In [1]:
%load_ext autoreload
%autoreload 2

# Imports

In [1]:
cd ../src

/home/theo/Kaggle/seversal/src


In [2]:
from util import *
from metric import *
from params import *
from imports import *
from post_process import *
from pseudo_labeling import *

Kept 1547 images out of 1801
Number of defects of class 0 : 75
Number of defects of class 1 : 3
Number of defects of class 2 : 564
Number of defects of class 3 : 93


In [3]:
from data.masks import *
from data.dataset import *
from data.transforms import *
from data.transforms import get_transforms as transfos

In [4]:
from training.train import *
from training.freezing import *
from training.predicting import *

In [5]:
from model_zoo.unet import *
from model_zoo.fpn_enetb5 import *
from model_zoo.unet_densenet169 import *
from model_zoo.unet_seresnext50 import *

In [6]:
from tqdm import tqdm_notebook as tqdm

sns.set_style('white')
KERNEL_START_TIME = time.time()
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
# warnings.simplefilter(action='ignore', category=RuntimeWarning)
print('Number of available cores :', multiprocessing.cpu_count())

Number of available cores : 16


# Parameters

In [7]:
seed = 2019
seed_everything(seed)

In [8]:
TRAIN_IMG_PATH = '../input/train_images/'
TEST_IMG_PATH = '../input/test_images/'
DATA_PATH = '../input/'

In [9]:
IMG_SHAPE = (256, 1600)

In [10]:
MEAN = np.array([0.485, 0.456, 0.406])
STD = np.array([0.229, 0.224, 0.225])

In [11]:
NUM_WORKERS = 4

VAL_BS = 1  # seresnext
# VAL_BS = 4  #resnet

DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# Data

In [12]:
CP_PATH = f'../checkpoints/{date.today()}/'
if not os.path.exists(CP_PATH):
    os.mkdir(CP_PATH)

In [13]:
df_train = pd.read_csv(DATA_PATH + 'train.csv')
df_train['EncodedPixels'].fillna('-1', inplace=True)

print('Number of training images : ', len(df_train) // 4)

Number of training images :  12568


In [14]:
df_train['ImageId'] = df_train['ImageId_ClassId'].apply(lambda x: x.split('_')[0])
df_train['ClassId'] = df_train['ImageId_ClassId'].apply(lambda x: x.split('_')[1])

group_img = df_train[['ImageId', 'EncodedPixels']].groupby('ImageId').agg(list)
rep_classes = group_img['EncodedPixels'].apply(pd.Series).rename(columns=lambda x : str(x))
rep_classes['ClassNumber'] = group_img['EncodedPixels'].apply(lambda x: len([i for i in x if i != "-1"]))

all_images = rep_classes.index.values
hard_negatives = rep_classes[rep_classes['ClassNumber'] == 0].index.values
positives = rep_classes[rep_classes['ClassNumber'] > 0].index.values

print('Number of images with defaults: ', len(positives))

Number of images with defaults:  6666


In [15]:
mask_dic = rep_classes.drop('ClassNumber', axis=1).to_dict('index')

## Pseudo Labels

In [16]:
THRESHOLD_CONFIDENT_FAULT = 0.4
THRESHOLD_CONFIDENT_NO_FAULT = 0.2

IMG_TEST = pd.read_csv(DATA_PATH + 'sample_submission.csv')['ImageId_ClassId'].apply(lambda x: x[:-2]).values[::4]

pl_probs = np.load('../output/all_probs_test.npy')
PL_LABELS = pl_probs

pl_df = pd.read_csv('../output/pl_9195.csv').fillna('')
pl_df['EncodedPixels2'] = pd.read_csv('../output/pl_9193.csv').fillna('')['EncodedPixels']

pl_df['probs'] = PL_LABELS.flatten()

pl_df['faulty'] = pl_df['EncodedPixels'].apply(lambda x: int(len(x) > 0))
pl_df['faulty2'] = pl_df['EncodedPixels2'].apply(lambda x: int(len(x) > 0))

pl_df['kept_pos'] = np.min((pl_df['probs'] > THRESHOLD_CONFIDENT_FAULT, pl_df['faulty'], pl_df['faulty2']), axis=0)
pl_df['kept_neg'] = np.min((pl_df['probs'] < THRESHOLD_CONFIDENT_NO_FAULT, 1 - pl_df['faulty'], 1 - pl_df['faulty2']), axis=0)
pl_df['kept'] = pl_df['kept_pos'] + pl_df['kept_neg'] > 0

kept_imgs = np.min(pl_df['kept'].values.reshape(-1, 4), axis=1)
pl_df['kept_all'] = np.repeat(kept_imgs, 4)

In [17]:
kept_classes = pl_df[pl_df['kept_all']]['faulty']
print(f'Kept {kept_classes.shape[0] / 4 :.0f} images out of {len(IMG_TEST)}')
for i in range(4):
    print(f'Number of defects of class {i} :', np.sum(kept_classes[i::4]))

Kept 1547 images out of 1801
Number of defects of class 0 : 75
Number of defects of class 1 : 3
Number of defects of class 2 : 564
Number of defects of class 3 : 93


In [18]:
PL_DF = pl_df[pl_df['kept_all']][['ImageId_ClassId', 'EncodedPixels', 'faulty']]

In [19]:
PL_DF['ImageId'] = PL_DF['ImageId_ClassId'].apply(lambda x: x.split('_')[0])
PL_DF['ClassId'] = PL_DF['ImageId_ClassId'].apply(lambda x: x.split('_')[1])

group_img_pl = PL_DF[['ImageId', 'EncodedPixels']].groupby('ImageId').agg(list)
rep_classes_pl = group_img_pl['EncodedPixels'].apply(pd.Series).rename(columns=lambda x : str(x))
rep_classes_pl['ClassNumber'] = group_img_pl['EncodedPixels'].apply(lambda x: len([i for i in x if i != ""]))

In [20]:
PL_IMAGES = rep_classes_pl.index.values
MASK_DIC_PL = rep_classes_pl.drop('ClassNumber', axis=1).to_dict('index')

In [21]:
classes, classes_max = get_classes(all_images, mask_dic)

In [22]:
dataset = SteelTrainDataset(mask_dic, all_images, classes, transfos(crop_size=512), positives)

In [23]:
try:
    for i in np.random.choice(range(len(dataset)), 10):
        img, masks, c = dataset[i]
        plot_masks(img[0], masks)
except:
    pass

<Figure size 1080x360 with 0 Axes>

## Training

In [24]:
def k_fold_training(create_model, backbone, images, classes, classes_max, mask_dic, transforms_dic,
                    kept_for_training=([],), k=5, selected_fold=0, use_aux_clf=False,
                    batch_sizes=(32,), epochs=(5,), seed=2019, lr=(1e-3,), min_lrs=(1e-5,),
                    verbose=1, save=True, cp=False, warmup_props=(0.1,), model_name="model", pretrained_path=''):
    splits = list(StratifiedKFold(n_splits=k, shuffle=True, random_state=seed).split(images, classes_max))
    train_idx, val_idx = splits[selected_fold]
    i = selected_fold

    print(f"-------------   Fold {i + 1}  -------------")
    seed_everything(seed + i)

    if backbone in SETTINGS.keys():
        model = create_model(SETTINGS[backbone], num_classes=4, center_block="aspp", aux_clf=use_aux_clf)
    else:
        model = create_model(num_classes=4, center_block="aspp", aux_clf=use_aux_clf)

    if len(pretrained_path):
        load_model_weights(model, pretrained_path)

    val_dataset = SteelValDataset(mask_dic, images[val_idx], classes[val_idx], transforms_dic["val"])

    freeze_encoder(model)
    n_parameters = count_parameters(model)

    print(f'\n - Training with 256x256 crops - Frozen encoder: {n_parameters} trainable parameters\n')

    train_dataset = SteelTrainDataset(mask_dic, images[train_idx], classes[train_idx], transforms_dic["train"][0],
                                      kept_imgs=kept_for_training[0])

    fit_seg(model, train_dataset, val_dataset, epochs=epochs[0], batch_size=batch_sizes[0],
            lr=lr[0], min_lr=min_lrs[0], schedule='cosine', use_aux_clf=use_aux_clf,
            warmup_prop=warmup_props[0], verbose=verbose)

    if cp:
        load_model_weights(model, f"{model_name}_{i + 1}_0.pt", verbose=1)
    elif save:
        save_model_weights(model, f"{model_name}_{i + 1}_0.pt", verbose=1)

    unfreeze_encoder(model)
    n_parameters = count_parameters(model)

    print(f'\n - Training with 256x256 crops - {n_parameters} trainable parameters\n')

    train_dataset = SteelTrainDataset(mask_dic, images[train_idx], classes[train_idx], transforms_dic["train"][1],
                                      kept_imgs=kept_for_training[1])

    fit_seg(model, train_dataset, val_dataset, epochs=epochs[1], batch_size=batch_sizes[1],
            lr=lr[1], min_lr=min_lrs[1], schedule='cosine', use_aux_clf=use_aux_clf,
            warmup_prop=warmup_props[1], verbose=verbose)

    if cp:
        load_model_weights(model, f"{model_name}_{i + 1}_1.pt", verbose=1)
    elif save:
        save_model_weights(model, f"{model_name}_{i + 1}_1.pt", verbose=1)

    print('\n - Training with 512x256 crops \n')

    train_dataset = SteelTrainDatasetPL(mask_dic, images[train_idx], classes[train_idx], transforms_dic["train"][2],
                                        kept_imgs=kept_for_training[2])

    fit_seg(model, train_dataset, val_dataset, epochs=epochs[2], batch_size=batch_sizes[2],
            lr=lr[2], min_lr=min_lrs[2], schedule='cosine', use_aux_clf=use_aux_clf,
            warmup_prop=warmup_props[2], verbose=verbose)

    if cp:
        load_model_weights(model, f"{model_name}_{i + 1}_2.pt", verbose=1)
    elif save:
        save_model_weights(model, f"{model_name}_{i + 1}_2.pt", verbose=1)

    print('\n - Training with 1024x256 crops \n')

    train_dataset = SteelTrainDataset(mask_dic, images[train_idx], classes[train_idx], transforms_dic["train"][3],
                                      kept_imgs=kept_for_training[3])

    fit_seg(model, train_dataset, val_dataset, epochs=epochs[3], batch_size=batch_sizes[3],
            lr=lr[3], min_lr=min_lrs[3], schedule='cosine', use_aux_clf=use_aux_clf,
            warmup_prop=warmup_props[3], verbose=verbose)

    if cp:
        load_model_weights(model, f"{model_name}_{i + 1}_3.pt", verbose=1)
    elif save:
        save_model_weights(model, f"{model_name}_{i + 1}_3.pt", verbose=1)

    print('\n - Training with full images \n')

    train_dataset = SteelTrainDataset(mask_dic, images[train_idx], classes[train_idx], transforms_dic["train"][4],
                                      kept_imgs=kept_for_training[4])

    fit_seg(model, train_dataset, val_dataset, epochs=epochs[4], batch_size=batch_sizes[4],
            lr=lr[4], min_lr=min_lrs[4], schedule='cosine', use_aux_clf=use_aux_clf,
            warmup_prop=warmup_props[4], verbose=verbose)

    if cp:
        load_model_weights(model, f"{model_name}_{i + 1}_4.pt", verbose=1)
    elif save:
        save_model_weights(model, f"{model_name}_{i + 1}_4.pt", verbose=1)

    print('\n - Training with 1024x256 crops and pseudo-labels\n')

    train_dataset = SteelTrainDatasetPL(mask_dic, images[train_idx], classes[train_idx], transforms_dic["train"][5],
                                        kept_imgs=kept_for_training[5])

    fit_seg(model, train_dataset, val_dataset, epochs=epochs[5], batch_size=batch_sizes[5],
            lr=lr[5], min_lr=min_lrs[5], schedule='cosine', use_aux_clf=use_aux_clf,
            warmup_prop=warmup_props[5], verbose=verbose)

    if cp:
        load_model_weights(model, f"{model_name}_{i + 1}_5.pt", verbose=1)
    elif save:
        save_model_weights(model, f"{model_name}_{i + 1}_5.pt", verbose=1)


# Training

## Parameters

In [25]:
backbone = 'se_resnext50_32x4d' #'resnet34'
build_model = SegmentationUnet
model_name = "unet_" + backbone 

In [26]:
kept_images = [[]]*7

for i, kept_imgs in enumerate(kept_images):
    print(f"Number of images kept for training at step {i+1} :", len(kept_imgs) if len(kept_imgs) else len(all_images))

Number of images kept for training at step 1 : 12568
Number of images kept for training at step 2 : 12568
Number of images kept for training at step 3 : 12568
Number of images kept for training at step 4 : 12568
Number of images kept for training at step 5 : 12568
Number of images kept for training at step 6 : 12568
Number of images kept for training at step 7 : 12568


In [27]:
k = 5
selected_fold = 0

In [28]:
transforms_dic = {"train": [transfos(crop_size=256), 
                            transfos(crop_size=256),
                            transfos(crop_size=512), 
                            transfos(crop_size=1024),
                            transfos(crop_size=0),
                            transfos(crop_size=1024),
                            transfos(crop_size=0)],
                  "val": transfos(test=True)}

# batch_sizes = [32, 32, 16, 8, 4] # resnet
batch_sizes = [16, 16, 8, 4, 2, 4]
epochs = [5, 30, 30, 20, 15, 20]
epochs = [1] * 7

warmup_props = [0, 0.1, 0.1, 0.1, 0.1, 0.2]
warmup_props = [0] * 7
lrs = [1e-3, 1e-3, 5e-4, 5e-5, 1e-5, 5e-5]

min_lrs = [1e-3, 1e-4, 5e-6, 1e-6, 1e-6, 1e-6, 1e-6] # serenext

In [29]:
tta = True
use_aux_clf = True

## Train

In [30]:
assert torch.cuda.is_available(), 'Training on GPU is mandatory'

k_fold_training(build_model, backbone, all_images, classes, classes_max, mask_dic, transforms_dic,
                kept_images, k=k, selected_fold=selected_fold, use_aux_clf=use_aux_clf,
                batch_sizes=batch_sizes, epochs=epochs, warmup_props=warmup_props, lr=lrs, min_lrs=min_lrs,
                verbose=1, save=True, cp=False, model_name=model_name, seed=seed)

-------------   Fold 1  -------------

 - Training with 256x256 crops - Frozen encoder: 68178409 trainable parameters

Epoch 1/1     lr=1.0e-03     t=359s     loss=2.4777     dice=0.85944     val_loss=1.1329     

 -> Saving weights to ../checkpoints/unet_se_resnext50_32x4d_1_0.pt


 - Training with 256x256 crops - 93621081 trainable parameters

Epoch 1/1     lr=1.0e-04     t=409s     loss=0.4815     dice=0.87876     val_loss=0.7910     

 -> Saving weights to ../checkpoints/unet_se_resnext50_32x4d_1_1.pt


 - Training with 512x256 crops 

Epoch 1/1     lr=5.0e-06     t=642s     loss=0.4997     dice=0.89123     val_loss=0.6569     

 -> Saving weights to ../checkpoints/unet_se_resnext50_32x4d_1_2.pt


 - Training with 1024x256 crops 



RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 10.76 GiB total capacity; 6.26 GiB already allocated; 3.38 MiB free; 448.25 MiB cached)