**About** : This notebook is used to train models.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import torch

print(torch.__version__)
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
device = torch.cuda.get_device_name(0)
print(device)

In [None]:
import os
import re
import sys
import glob
import torch
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.metrics import *

pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [None]:
from params import *
from data.dataset import *
from data.preparation import *
from data.transforms import get_transfos

from model_zoo.models import define_model
from training.losses import *

## Preparation

In [None]:
df_patient, df_img = prepare_data(DATA_PATH)

In [None]:
def load_segmentation(path):
    img = nib.load(path).get_fdata()
    img = np.transpose(img, [1, 0, 2])
    img = np.rot90(img, 1, (1, 2))
    img = img[::-1, :, :]
    img = np.transpose(img, [1, 0, 2])
    return img[::-1]

In [None]:
filepath = '../input/segmentations/21057.nii'
volume_seg = create_3D_segmentations(filepath)
print(f'3D segmentation file shape: {volume_seg.shape}')

In [None]:
series = df_img.groupby('series')[['patient_id', "frame"]].max().reset_index()

segs = pd.DataFrame({"path": glob.glob("../input/segmentations/*.nii")})
segs['series'] = segs['path'].apply(lambda x: int(x.split('/')[-1][:-4]))
segs = segs.merge(series)
segs = segs[["patient_id", "series", "frame", "path"]]

In [None]:
def load_series(patient_id, series, img_path=""):
    files = sorted(glob.glob(img_path + f"{patient_id}_{series}_*"))
    imgs = np.array([cv2.imread(f, cv2.IMREAD_GRAYSCALE) for f in files])
    return imgs

In [None]:
SAVE = True
PLOT = True

SAVE_FOLDER = "../input/segs/"
os.makedirs(SAVE_FOLDER, exist_ok=True)

In [None]:
labels = {
    1: "liver",
    2: "spleen",
    3: "left-kidney",
    4: "right-kidney",
    5: "bowel",
}

plt.imshow(np.arange(1, 6)[None], cmap='Set3', alpha=0.5)  

for i in range(len(labels)):
    plt.text(
        i, 0, labels[i + 1],
        horizontalalignment='center',
        verticalalignment='center',
    )
    
plt.axis(False)
plt.show()

for idx in tqdm(range(len(segs))):
    patient_id = segs['patient_id'][idx]
    series = segs['series'][idx]

    imgs = sorted(glob.glob("../input/imgs/" + f"{patient_id}_{series}_*"))
    seg = load_segmentation(segs['path'][idx])

    SAVE_FOLDER = "../input/segs/"
    os.makedirs(SAVE_FOLDER, exist_ok=True)
    
    ids = [i * len(imgs) // 5 for i in range(1, 5)]

    if PLOT:
        plt.figure(figsize=(20, 5))
    for i, frame in enumerate(seg):
        if SAVE:
            cv2.imwrite(SAVE_FOLDER + f"{patient_id}_{series}_{i:04d}.png", frame)
        
        if i in ids and PLOT:
            plt.subplot(1, len(ids), ids.index(i) + 1)
            img = cv2.imread(imgs[i], cv2.IMREAD_GRAYSCALE)
            
            plt.imshow(img, cmap='gray')
            mask = np.where(frame, frame, np.nan)
            plt.imshow(mask, cmap='Set3', alpha=0.3)        
            plt.axis(False)
            plt.title(f'Frame {i}')
            
    if PLOT:
        plt.show()
#         if idx > 10:
#             break

In [None]:
df_seg = pd.DataFrame({"mask_path": sorted(glob.glob('../input/segs/*.png'))})
df_seg['patient_id'] = df_seg['mask_path'].apply(lambda x: int(x.split('/')[-1].split('_')[0]))
df_seg['series'] = df_seg['mask_path'].apply(lambda x: int(x.split('/')[-1].split('_')[1]))
df_seg['frame'] = df_seg['mask_path'].apply(lambda x: int(x.split('/')[-1].split('_')[2][:-4]))

df_seg['img_path'] = df_seg['mask_path'].apply(lambda x: re.sub("/segs/", "/imgs/", x))
df_seg = df_seg[["patient_id", "series", "frame", "img_path", "mask_path"]]

df_seg.head()

In [None]:
pixel_counts = []
ph = np.zeros(6)

for i in tqdm(range(len(df_seg))):
    mask = cv2.imread(df_seg['mask_path'][i], cv2.IMREAD_GRAYSCALE)

    cts = np.zeros(6)
    counts = np.bincount(mask.flatten())
    cts[:len(counts)] = counts

    pixel_counts.append(cts)
    
pixel_counts = np.array(pixel_counts)
for k in labels:
    df_seg[f'pixel_count_{labels[k]}'] = pixel_counts[:, k]

In [None]:
df_seg.head()

In [None]:
df_seg.to_csv('../input/df_seg.csv', index=False)
print('-> Saved df to ', '../input/df_seg.csv')

In [None]:
df_seg.max()

In [None]:
i = np.random.choice(df_seg[df_seg['pixel_count_bowel'] > 10000].index)

img = cv2.imread(df_seg['img_path'][i], cv2.IMREAD_GRAYSCALE)
mask = cv2.imread(df_seg['mask_path'][i], cv2.IMREAD_GRAYSCALE)
            
plt.imshow(img, cmap='gray')
plt.imshow(np.where(mask, mask, np.nan), cmap='Set3', alpha=0.3)        
plt.axis(False)
plt.title(f'Frame {i}')
plt.show()

## Data

In [None]:
transforms = get_transfos(augment=True, resize=(512, 512), strength=3)

dataset = Abdominal2DDataset(df_patient.head(100), df_img, transforms=transforms, train=False, pos_prop=0.5)

In [None]:
for i in tqdm(range(len(dataset))):
    img, y_img, y_patient = dataset[i]
    assert (y_img.numpy() == dataset.img_targets[i]).all()
    break

In [None]:
# for i in range(10):
# #     i = 0
#     img, y_patient, y_img = dataset[i]

#     plt.figure(figsize=(8, 8))
#     plt.imshow(img.numpy().transpose(1, 2, 0))
    
#     plt.title(f"#{i} - {IMAGE_TARGETS} - {y_img}\n{PATIENT_TARGETS} - {y_patient}")
#     plt.show()

#     break

## Model

In [None]:
model = define_model(
    'tf_efficientnetv2_s',
    num_classes=2,
    num_classes_aux=11,
    n_channels=3,
    reduce_stride=False,
    drop_path_rate=0.,
    use_gem=True
)

In [None]:
x = torch.cat([img.unsqueeze(0)] * 2)

pred, pred_aux = model(x)

In [None]:
pred.size(), pred_aux.size()
# y_patient.size(), y_img.size()

In [None]:
y_p = torch.cat([y_patient.unsqueeze(0)] * 2)
y_i = torch.cat([y_img.unsqueeze(0)] * 2)

In [None]:
loss = AbdomenLoss({
    "name": "image",
    "smoothing": 0,
    "aux_loss_weight": 0.5,
    "name_aux": "patient",
    "smoothing_aux": 0,
})

In [None]:
pred.size(), y_i.size()

In [None]:
loss(pred, pred_aux, y_i, y_p)

In [None]:
# loss = PatientLoss()
# loss(pred_aux, y_p)

## Training

In [None]:
class Config:
    """
    Parameters used for training
    """
    # General
    seed = 42
    verbose = 1
    device = "cuda"
    save_weights = True

    # Data
    resize = (512, 512)
    aug_strength = 1
    pos_prop = 0.1

    # k-fold
    k = 4
    folds_file = f"../input/folds_{k}.csv"
    selected_folds = [0]  # , 1, 2, 3]

    # Model
    name = "tf_efficientnet_b0"
    pretrained_weights = None # PRETRAINED_WEIGHTS[name]  # None
    
    num_classes = 2
    num_classes_aux = 0
    drop_rate = 0
    drop_path_rate = 0
    n_channels = 3
    reduce_stride = False
    replace_pad_conv = False
    use_gem = True

    # Training    
    loss_config = {
        "name": "image",
        "smoothing": 0,
        "activation": "sigmoid",
        "aux_loss_weight": 0,
        "name_aux": "patient",
        "smoothing_aux": 0,
        "activation_aux": "",
        "ousm_k": 0,  # todo ?
    }

    data_config = {
        "batch_size": 16,
        "val_bs": 16,
        "mix": "mixup",
        "mix_proba": 0.,
        "mix_alpha": 4.,
        "additive_mix": False,
        "num_classes": num_classes,
        "num_workers": 8,
    }

    optimizer_config = {
        "name": "AdamW",
        "lr": 5e-4,
        "warmup_prop": 0.,
        "betas": (0.9, 0.999),
        "max_grad_norm": 10.,
        "weight_decay": 0.,
    }

    epochs = 1

    use_fp16 = True

    verbose = 1
    verbose_eval = 50
    
    fullfit = False
    n_fullfit = 1

In [None]:
DEBUG = True
log_folder = None
run = None

In [None]:
if not DEBUG:
    log_folder = prepare_log_folder(LOG_PATH)
    print(f"Logging results to {log_folder}")
    config_df = save_config(Config, log_folder + "config.json")
    create_logger(directory=log_folder, name="logs.txt")
#     run = init_neptune(Config, log_folder)

df_patient, df_img = prepare_data(DATA_PATH)
init_distributed(Config)

preds = k_fold(Config, df_patient, df_img, log_folder=log_folder, run=run)

Done ! 