In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt
import random

In [3]:
import cv2
from fastai import *
from fastai.vision import *
from fastai.callbacks import *

In [4]:
from dataset_spliter import SplitByPatient
from metrics import *#F1Weighted, MCC
from losses import *

In [5]:
path = Path('/data/Datasets/WhiteBloodCancer/train/')

In [6]:
np.random.seed(42)

In [7]:
fnames = get_image_files(path, recurse=True)
fnames[:5]

[PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H10_43_1_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H22_31_15_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H14_9_11_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H14_28_6_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H10_189_1_hem.bmp')]

In [8]:
hem_regex = re.compile(r'UID_(H[0-9]+)_', re.IGNORECASE)
all_regex = re.compile(r'UID_([0-9]+)_', re.IGNORECASE)

In [9]:
hem_patient_ids = list(set([hem_regex.search(str(fn)).group(1)
                            for fn in fnames if hem_regex.search(str(fn)) is not None]))
all_patint_ids = list(set([all_regex.search(str(fn)).group(1)
                           for fn in fnames if all_regex.search(str(fn)) is not None]))

hem_patients = dict((k,[]) for k in hem_patient_ids)
all_patints = dict((k,[]) for k in all_patint_ids)

[all_patints[key].append(fn) for key in all_patints.keys() for fn in fnames if 'UID_{0}_'.format(key) in str(fn)]
[hem_patients[key].append(fn) for key in hem_patients.keys() for fn in fnames if 'UID_{0}_'.format(key) in str(fn)]
print()




## Split data into train val 

In [10]:
split_handler = SplitByPatient(hem_patients, all_patints)

### Split by regex

In [11]:
train_regex = re.compile(r'(fold_0|fold_1|fold_2)')
val_regex = re.compile(r'(fold_3)')

hem_train, all_train, hem_val, all_val = split_handler.split_by_regex(train_regex, val_regex)

In [12]:
print('Train Total: {0}'.format(len(hem_train)+len(all_train)))
print('Val Total: {0}'.format(len(hem_val)+len(all_val)))
print("")
print('Hem train: {}'.format(len(hem_train)))
print('All train: {}'.format(len(all_train)))
print('Hem val: {}'.format(len(hem_val)))
print('All val: {}'.format(len(all_val)))

Train Total: 10661
Val Total: 1867

Hem train: 3389
All train: 7272
Hem val: 648
All val: 1219


In [13]:
pat = re.compile(r'^.*(hem|all).bmp$')

def get_label(fn):
    return pat.search(str(fn)).group(1)

### Use complete image

In [14]:
train = ImageList(hem_train + all_train) #optinal scale up classes 
valid = ImageList(hem_val + all_val)

In [15]:
item_list = ItemLists(path, train, valid)
lls = item_list.label_from_func(get_label)

#### Data augmentation

In [16]:
xtra_tfms=[cutout(n_holes=5, length=0.2)]#squish(scale=0.66), 
tfms = get_transforms(do_flip=True, 
                      flip_vert=True, 
                      max_rotate=90,  
                      #max_lighting=0.15, 
                      #max_zoom=1.5, 
                      #max_warp=0.2,
                      #p_affine=0.75,
                      #p_lighting=0.75,  
                      #xtra_tfms=xtra_tfms,
                     )

#### Create dataset 

In [17]:
def get_data(bs, size):
    data  = ImageDataBunch.create_from_ll(lls, size=size, bs=bs, 
                                      ds_tfms=tfms, padding_mode='zeros',
                                      resize_method=ResizeMethod.PAD)
    data = data.normalize()
    #data = data.normalize((channel_mean, channel_std))
    return data
    

In [20]:
size = 256
bs = 96
data = get_data(bs, size)

experiment_name = "baseline_xresnet18"
learn = Learner(data, models.xresnet18(), 
                       metrics=[error_rate, F1Weighted(), MCC()], #  
                       #loss_func=FocalLoss(num_classes=1),
                       #ps=0.75,
                       #wd=0.1,
                       loss_func = LabelSmoothingCrossEntropy(),
                       callback_fns=[partial(SaveModelCallback, name='stage1-{}-{}'.format(experiment_name, size))],

                  )#

In [23]:
#learn.freeze()
lr = 1e-2
learn.fit_one_cycle(5, lr)

#learn.unfreeze()
learn.fit_one_cycle(10, lr/5)

epoch,train_loss,valid_loss,error_rate,f1_weighted,mcc,time
0,1.363156,1.988194,0.347081,0.515819,0.000000,00:55
1,1.375123,1.854743,0.423674,0.578546,0.075622,00:56
2,1.355604,1.845021,0.313873,0.618050,0.222925,00:56
3,1.310464,1.686976,0.296197,0.640307,0.288308,00:57
4,1.272097,1.679065,0.285485,0.669202,0.316721,00:56


  'precision', 'predicted', average, warn_for)
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Better model found at epoch 0 with val_loss value: 1.9881939888000488.
Better model found at epoch 1 with val_loss value: 1.8547425270080566.
Better model found at epoch 2 with val_loss value: 1.8450214862823486.
Better model found at epoch 3 with val_loss value: 1.6869760751724243.
Better model found at epoch 4 with val_loss value: 1.679065227508545.


epoch,train_loss,valid_loss,error_rate,f1_weighted,mcc,time
0,1.263184,1.691911,0.295126,0.658821,0.286440,00:56
1,1.266327,1.667933,0.327263,0.670647,0.269428,00:56
2,1.275952,1.577885,0.283878,0.707482,0.347320,00:56
3,1.275380,1.571581,0.270487,0.713367,0.366903,00:56
4,1.263966,1.600937,0.281200,0.701362,0.339419,00:57
5,1.251676,1.563587,0.253348,0.720730,0.406787,00:56
6,1.247731,1.708605,0.289234,0.644538,0.319151,00:56
7,1.231475,1.650438,0.271559,0.698966,0.356635,00:56
8,1.229759,1.648982,0.275844,0.685367,0.344952,00:56
9,1.224307,1.658741,0.272630,0.689370,0.354474,00:56


Better model found at epoch 0 with val_loss value: 1.6919114589691162.
Better model found at epoch 1 with val_loss value: 1.6679327487945557.
Better model found at epoch 2 with val_loss value: 1.5778849124908447.
Better model found at epoch 3 with val_loss value: 1.571581482887268.
Better model found at epoch 5 with val_loss value: 1.5635871887207031.


In [25]:
size = 384
bs = 64
learn.data = get_data(bs, size)

lr = 1e-2
learn.fit_one_cycle(5, lr)


learn.fit_one_cycle(10, lr/5)

epoch,train_loss,valid_loss,error_rate,f1_weighted,mcc,time
0,1.268357,1.537939,0.280129,0.705837,0.346436,01:56
1,1.302466,1.641340,0.295661,0.711148,0.408340,01:58
2,1.291312,1.728246,0.338511,0.536138,0.123812,01:57
3,1.249037,1.930411,0.321907,0.576069,0.207767,01:58
4,1.218393,1.550064,0.248527,0.727415,0.419615,01:57


Better model found at epoch 0 with val_loss value: 1.5379390716552734.


epoch,train_loss,valid_loss,error_rate,f1_weighted,mcc,time
0,1.251987,1.595708,0.266738,0.697545,0.371408,01:58
1,1.238169,1.672110,0.257097,0.700872,0.407509,01:58
2,1.235609,1.589297,0.233530,0.736584,0.467387,01:58
3,1.236348,1.549270,0.299411,0.707761,0.417782,01:58
4,1.216320,1.498075,0.213712,0.771667,0.508813,01:58
5,1.218321,1.639978,0.257097,0.693375,0.419912,01:57
6,1.200373,1.592383,0.242100,0.726634,0.442775,01:57
7,1.191230,1.651222,0.251205,0.710590,0.422219,01:58
8,1.188703,1.585802,0.236743,0.732122,0.459129,01:58
9,1.185622,1.616802,0.244242,0.721564,0.439570,01:58


Better model found at epoch 0 with val_loss value: 1.5957081317901611.
Better model found at epoch 2 with val_loss value: 1.5892970561981201.
Better model found at epoch 3 with val_loss value: 1.5492702722549438.
Better model found at epoch 4 with val_loss value: 1.498075246810913.


In [27]:
size = 450
bs = 32
learn.data = get_data(bs, size)

lr = 1e-2
learn.fit_one_cycle(5, lr)

learn.fit_one_cycle(10, lr/5)

epoch,train_loss,valid_loss,error_rate,f1_weighted,mcc,time


RuntimeError: The size of tensor a (57) must match the size of tensor b (56) at non-singleton dimension 3

In [None]:
interp.plot_top_losses(9, figsize=(15,11))

In [None]:
interp.plot_confusion_matrix(figsize=(16,16), dpi=60)

In [None]:
int(interp.confusion_matrix().flatten()[[1, 2]].sum())