In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt
import random

In [3]:
import cv2
from fastai import *
from fastai.vision import *
from fastai.callbacks import *

In [4]:
from dataset_spliter import SplitByPatient
from metrics import *#F1Weighted, MCC
from losses import *

In [5]:
import pretrainedmodels

In [6]:
path = Path('/data/Datasets/WhiteBloodCancer/train/')

In [7]:
np.random.seed(42)

In [8]:
fnames = get_image_files(path, recurse=True)
fnames[:5]

[PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H10_43_1_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H22_31_15_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H14_9_11_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H14_28_6_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H10_189_1_hem.bmp')]

In [9]:
hem_regex = re.compile(r'UID_(H[0-9]+)_', re.IGNORECASE)
all_regex = re.compile(r'UID_([0-9]+)_', re.IGNORECASE)

In [10]:
hem_patient_ids = list(set([hem_regex.search(str(fn)).group(1)
                            for fn in fnames if hem_regex.search(str(fn)) is not None]))
all_patint_ids = list(set([all_regex.search(str(fn)).group(1)
                           for fn in fnames if all_regex.search(str(fn)) is not None]))

hem_patients = dict((k,[]) for k in hem_patient_ids)
all_patints = dict((k,[]) for k in all_patint_ids)

[all_patints[key].append(fn) for key in all_patints.keys() for fn in fnames if 'UID_{0}_'.format(key) in str(fn)]
[hem_patients[key].append(fn) for key in hem_patients.keys() for fn in fnames if 'UID_{0}_'.format(key) in str(fn)]
print()




## Split data into train val 

In [11]:
split_handler = SplitByPatient(hem_patients, all_patints)

### Split by regex

In [12]:
train_regex = re.compile(r'(fold_0|fold_1|fold_2)')
val_regex = re.compile(r'(fold_3)')

hem_train, all_train, hem_val, all_val = split_handler.split_by_regex(train_regex, val_regex)

In [13]:
print('Train Total: {0}'.format(len(hem_train)+len(all_train)))
print('Val Total: {0}'.format(len(hem_val)+len(all_val)))
print("")
print('Hem train: {}'.format(len(hem_train)))
print('All train: {}'.format(len(all_train)))
print('Hem val: {}'.format(len(hem_val)))
print('All val: {}'.format(len(all_val)))

Train Total: 10661
Val Total: 1867

Hem train: 3389
All train: 7272
Hem val: 648
All val: 1219


In [14]:
pat = re.compile(r'^.*(hem|all).bmp$')

def get_label(fn):
    return pat.search(str(fn)).group(1)

### Use complete image

In [15]:
train = ImageList(hem_train + all_train) #optinal scale up classes 
valid = ImageList(hem_val + all_val)

In [16]:
item_list = ItemLists(path, train, valid)
lls = item_list.label_from_func(get_label)

#### Data augmentation

In [17]:
xtra_tfms=[cutout(n_holes=5, length=0.2)]#squish(scale=0.66), 
tfms = get_transforms(do_flip=True, 
                      flip_vert=True, 
                      #max_rotate=90,  
                      #max_lighting=0.15, 
                      #max_zoom=1.5, 
                      #max_warp=0.2,
                      #p_affine=0.75,
                      #p_lighting=0.75,  
                      #xtra_tfms=xtra_tfms,
                     )

#### Create dataset 

In [18]:
def get_data(bs, size):
    data  = ImageDataBunch.create_from_ll(lls, size=size, bs=bs, 
                                      ds_tfms=tfms, padding_mode='zeros',
                                      resize_method=ResizeMethod.PAD)
    data = data.normalize()
    #data = data.normalize((channel_mean, channel_std))
    return data
    

In [19]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [20]:
def get_cadene_model(pretrained=True, model_name='xception'):
    if pretrained:
        arch = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
    else:
        arch = pretrainedmodels.__dict__[model_name](num_classes=1000)
    return arch

In [21]:
size = 256
bs = 64
data = get_data(bs, size)

experiment_name = "baseline_se_resnext50_32x4d"
learn = create_cnn(data, get_cadene_model, 
                    cut=-2,
                       metrics=[error_rate, F1Weighted(), MCC()], #  
                       #loss_func=FocalLoss(num_classes=1),
                       #ps=0.75,
                       #wd=0.1,
                       loss_func = LabelSmoothingCrossEntropy(),
                       callback_fns=[partial(SaveModelCallback, name='stage1-{}-{}'.format(experiment_name, size))],

                  )#

  warn("`create_cnn` is deprecated and is now named `cnn_learner`.")
Downloading: "http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth" to /home/c.marzahl@de.eu.local/.torch/models/xception-43020ad28.pth
100%|██████████| 91675053/91675053 [00:01<00:00, 45946853.24it/s]


In [22]:
learn.freeze()
lr = 1e-2
learn.fit_one_cycle(5, lr)

learn.unfreeze()
learn.fit_one_cycle(10, slice(1e-5,lr/5))

epoch,train_loss,valid_loss,error_rate,f1_weighted,mcc,time
0,0.620999,0.820352,0.320300,0.588439,0.203612,02:06
1,0.581140,1.036119,0.306909,0.646814,0.250288,02:05
2,0.546890,0.728534,0.310123,0.656216,0.249549,02:06
3,0.515418,0.672325,0.298340,0.681735,0.294544,02:06
4,0.498024,0.657104,0.291377,0.673994,0.300137,02:06


Better model found at epoch 0 with val_loss value: 0.8203524947166443.
Better model found at epoch 2 with val_loss value: 0.7285336256027222.
Better model found at epoch 3 with val_loss value: 0.6723250150680542.
Better model found at epoch 4 with val_loss value: 0.6571040153503418.


epoch,train_loss,valid_loss,error_rate,f1_weighted,mcc,time
0,0.496079,0.648587,0.291377,0.679547,0.303178,03:04
1,0.492143,0.651308,0.286020,0.685438,0.317755,03:03
2,0.482783,0.678395,0.289234,0.673071,0.305242,03:03
3,0.483907,0.656576,0.275308,0.680436,0.349047,03:03
4,0.471519,0.645113,0.279593,0.697557,0.338252,03:03
5,0.463317,0.635114,0.278522,0.690507,0.337103,03:02
6,0.455003,0.639009,0.268345,0.698705,0.365823,03:03
7,0.456566,0.646097,0.276915,0.694184,0.342088,03:02
8,0.452922,0.638488,0.268345,0.702529,0.365558,03:03
9,0.455683,0.636411,0.269416,0.697808,0.362700,03:03


Better model found at epoch 0 with val_loss value: 0.6485869884490967.
Better model found at epoch 4 with val_loss value: 0.6451134085655212.
Better model found at epoch 5 with val_loss value: 0.6351143717765808.


In [25]:
learn.export('NeuVersion.pkl')  

In [23]:
size = 384
bs = 32
learn.data = get_data(bs, size)

learn.freeze()
lr = 1e-2
learn.fit_one_cycle(5, lr)

learn.unfreeze()
learn.fit_one_cycle(10, slice(1e-5,lr/5))

epoch,train_loss,valid_loss,error_rate,f1_weighted,mcc,time
0,0.500966,0.674683,0.299411,0.626869,0.284938,04:55
1,0.473199,0.598263,0.238886,0.723036,0.463528,04:49
2,0.444079,0.578668,0.215319,0.774866,0.505193,04:49
3,0.421835,0.572579,0.216926,0.764256,0.502888,04:49
4,0.411102,0.543771,0.189609,0.799719,0.568077,04:49


Better model found at epoch 0 with val_loss value: 0.6746834516525269.
Better model found at epoch 1 with val_loss value: 0.5982630848884583.
Better model found at epoch 2 with val_loss value: 0.5786679983139038.
Better model found at epoch 3 with val_loss value: 0.5725790858268738.
Better model found at epoch 4 with val_loss value: 0.5437712073326111.


epoch,train_loss,valid_loss,error_rate,f1_weighted,mcc,time
0,0.403261,0.551592,0.187467,0.800390,0.574711,07:09
1,0.407858,0.543757,0.185324,0.803400,0.579374,07:06
2,0.407417,0.579817,0.221210,0.756380,0.494502,07:06
3,0.405826,0.562218,0.198715,0.787690,0.546897,07:08
4,0.406101,0.559534,0.199250,0.784466,0.548254,07:09
5,0.399481,0.549608,0.190680,0.794580,0.569394,07:09
6,0.401980,0.548249,0.188538,0.800281,0.571117,07:09
7,0.397005,0.543221,0.178361,0.810993,0.596538,07:08
8,0.391432,0.548441,0.184788,0.803608,0.581067,07:09
9,0.395786,0.534917,0.175683,0.815824,0.601377,07:08


Better model found at epoch 0 with val_loss value: 0.5515920519828796.
Better model found at epoch 1 with val_loss value: 0.543756902217865.
Better model found at epoch 7 with val_loss value: 0.5432209372520447.
Better model found at epoch 9 with val_loss value: 0.5349165201187134.


In [24]:
size = 450
bs = 16
learn.data = get_data(bs, size)

learn.freeze()
lr = 1e-2
learn.fit_one_cycle(5, lr)

learn.unfreeze()
learn.fit_one_cycle(10, slice(1e-5,lr/5))

epoch,train_loss,valid_loss,error_rate,f1_weighted,mcc,time
0,0.426746,0.621214,0.246385,0.753615,0.456382,06:38
1,0.450715,0.578662,0.219604,0.759138,0.497798,06:17
2,0.437265,0.545300,0.194965,0.799101,0.555895,06:16
3,0.421880,0.569562,0.213712,0.761715,0.519570,06:16
4,0.413381,0.519376,0.171934,0.819574,0.610643,06:16


Better model found at epoch 0 with val_loss value: 0.6212136149406433.
Better model found at epoch 1 with val_loss value: 0.5786618590354919.
Better model found at epoch 2 with val_loss value: 0.5453004240989685.
Better model found at epoch 4 with val_loss value: 0.5193756222724915.


epoch,train_loss,valid_loss,error_rate,f1_weighted,mcc,time
0,0.398535,0.562051,0.196572,0.784496,0.559874,09:23
1,0.403705,0.535078,0.190145,0.796215,0.569383,09:20
2,0.400987,0.537676,0.176219,0.811580,0.604257,09:21
3,0.385849,0.547556,0.184253,0.805514,0.581232,09:20
4,0.403077,0.610102,0.229780,0.739590,0.480221,09:20
5,0.391471,0.547728,0.193894,0.788649,0.564838,09:20
6,0.388433,0.539917,0.185324,0.808374,0.578100,09:20
7,0.392572,0.542688,0.196572,0.793580,0.550554,09:20
8,0.388792,0.545777,0.196572,0.795447,0.550799,09:20
9,0.384841,0.561090,0.212641,0.770830,0.512611,09:20


Better model found at epoch 0 with val_loss value: 0.5620509386062622.
Better model found at epoch 1 with val_loss value: 0.5350775122642517.
