In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import csv
import os
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt
import random
from sklearn.metrics import matthews_corrcoef
from sklearn.model_selection import KFold

In [3]:
import cv2
from fastai import *
from fastai.vision import *
from fastai.callbacks import *

In [4]:
from dataset_spliter import SplitByPatient
from metrics import *#F1Weighted, MCC
from losses import *
from data_loader import ImageItemListCell
from augmentation import cutout




In [5]:
path = Path('/data/Datasets/WhiteBloodCancer/train/')

In [6]:
fnames = get_image_files(path, recurse=True)
fnames[:5]

[PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H10_43_1_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H22_31_15_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H14_9_11_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H14_28_6_hem.bmp'),
 PosixPath('/data/Datasets/WhiteBloodCancer/train/fold_1/hem/UID_H10_189_1_hem.bmp')]

In [7]:
len(fnames)

10661

#### Data augmentation

In [8]:
cutout_fn = TfmLighting(cutout)
xtra_tfms=[cutout_fn(n_holes=5, length=0.1)] #squish(scale=0.66),
tfms = get_transforms(do_flip=True, 
                      flip_vert=True, 
                      max_rotate=90,  
                      #max_lighting=0.15, 
                      #max_zoom=2, 
                      #max_warp=0.2,
                      #p_affine=0.75,
                      #p_lighting=0.75,  
                      #xtra_tfms=xtra_tfms,
                     )

#### Create dataset 

In [9]:
test_path = Path('/data/Datasets/WhiteBloodCancer/test/')

pat = re.compile(r'^.*(hem|all).bmp$')
def get_label(fn):
    return pat.search(str(fn)).group(1)

def get_data(bs, size, train_index, test_index):

    lls = ImageItemList(fnames, path=path)\
        .split_by_idxs(train_index, test_index)\
        .label_from_func(get_label).add_test_folder('../test')
    data = ImageDataBunch.create_from_ll(lls, size=size, bs=bs, ds_tfms=tfms).normalize()
    
    return data

In [10]:
def create_save_point(learn: Learner):
    
    preds_test, y_test=learn.get_preds(ds_type=DatasetType.Valid)# 
    preds_test = np.argmax(torch.sigmoid(preds_test), axis=1)
    score = int(matthews_corrcoef(y_test, preds_test) * 10000)
    
    learn.export('{}-{}-{}.pkl'.format(score, size, experiment_name))  
    
    export_csv(learn, score)

In [11]:
def export_csv(learn: Learner, score):
    
    y_pred, y_test_tta = learn.TTA(ds_type=DatasetType.Test, beta=0.4, scale=1.3)
    y_pred = to_np(y_pred)
    
    filename = '{}-{}-{}'.format(score, size, experiment_name)
    with open(path/filename+".csv", mode='w') as submission:
        submission_writer = csv.writer(submission, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

        for fn, y in zip(learn.data.test_dl.items, y_pred):
            submission_writer.writerow([fn.name, y[0], y[1]])
            
    submission = [0 for i in range(y_pred.shape[0])]
            
    threshold = 0.5 
    for fn, y in zip(learn.data.test_dl.items, y_pred[:,0]):
        index = int(fn.name.replace(".bmp",'')) - 1
        y = 1 if y > threshold else 0
        submission[index] = y

    with open(path/filename+'.predict', 'w') as f:
        for item in submission:
            f.write("{}\n".format(item))

In [12]:
val_ids_fold_0 = []
train_ids_fold_0 = []

for id, fn in enumerate(fnames):

    found = False
    for pt in ['3', '19', '74', '16', '36', 'H4', 'H6', 'H46']:
        if 'UID_{}_'.format(pt) in str(fn):
            val_ids_fold_0.append(id)
            found = True
            break
    if found: continue
    train_ids_fold_0.append(id)
            
val_ids_fold_1 = []
train_ids_fold_1 = []


for id, fn in enumerate(fnames):

    found = False
    for pt in ['15', '72', '1', '3','17', 'H7', 'H18', 'H1']:
        if 'UID_{}_'.format(pt) in str(fn):
            val_ids_fold_1.append(id)
            found = True
            break
    if found: continue
    train_ids_fold_1.append(id)
        

train = [train_ids_fold_0, train_ids_fold_1]
val = [val_ids_fold_0, val_ids_fold_1]

if len(val_ids_fold_0) + len(train_ids_fold_0) != len(fnames):
    raise Exception

In [13]:
#import torchvision.models as models

fold = 0
for train_index, test_index in zip(train, val):
    size = 256
    bs = 96

    data = get_data(bs, size, train_index, test_index)
    
    gc.collect()
    experiment_name = 'rn34-Longer-Custom-{}'.format(fold)
    
    learn = create_cnn(data, models.resnet34, 
                   metrics=[error_rate, F1Weighted(), MCC()],  
                   #loss_func=FocalLoss(num_classes=1),
                   #ps=0.75,
                   #wd=0.1,
                   callback_fns=[ShowGraph, partial(SaveModelCallback, name='stage1-{}-{}'.format(experiment_name, size))],
                  )#.to_fp16()

    lr = 1e-2
    learn.fit_one_cycle(5, lr)
    
    learn.unfreeze()
    learn.callback_fns[2] = partial(SaveModelCallback, 
                              monitor="mcc", 
                              mode='max', 
                              name='stage2-{}-{}'.format(experiment_name, size))
    learn.fit_one_cycle(10, slice(1e-5,lr/5))
    
    ####################################### 384
    
    size = 384
    bs = 64
    learn.freeze()
    
    learn.data = get_data(bs, size, train_index, test_index)
    learn.callback_fns[2] = partial(SaveModelCallback, 
                              monitor="mcc", 
                              mode='max', 
                              name='stage1-{}-{}'.format(experiment_name, size))
    learn.fit_one_cycle(5, slice(1e-5,lr/5))
    
    learn.unfreeze()
    learn.callback_fns[2] = partial(SaveModelCallback, 
                              monitor="mcc", 
                              mode='max', 
                              name='stage2-{}-{}'.format(experiment_name, size))
    learn.fit_one_cycle(10, slice(1e-5,lr/5))
    
    create_save_point(learn)
    ####################################### 450
    

    size = 450
    bs = 32
    learn.freeze()
    
    learn.data = get_data(bs, size, train_index, test_index)
    learn.callback_fns[2] = partial(SaveModelCallback, 
                              monitor="mcc", 
                              mode='max', 
                              name='stage1-{}-{}'.format(experiment_name, size))
    learn.fit_one_cycle(5, slice(1e-5,lr/5))
    
    learn.unfreeze()
    learn.callback_fns[2] = partial(SaveModelCallback, 
                              monitor="mcc", 
                              mode='max', 
                              name='stage2-{}-{}'.format(experiment_name, size))
    learn.fit_one_cycle(10, slice(1e-5,lr/5))
    
    create_save_point(learn)

    fold += 1

TypeError: unsupported operand type(s) for +: 'PosixPath' and 'str'