In [1]:
import os

import pandas as pd
from sklearn.model_selection import KFold 

In [2]:
from img_generator import build_train_test_df, DataGenerator2D
from model_utils import calculate_iou_holdout_set, jaccard_distance_loss, focal_loss

In [3]:
data_path_source_dir = os.path.join('ml4h_proj1_colon_cancer_ct', 'ml4h_proj1_colon_cancer_ct')

## Create dataframes in the format and with the information required by the generator

### Create datframes with paths and depth for the images

In [4]:
tr_df, x_ts_df = build_train_test_df(data_path_source_dir)

In [5]:
tr_df

Unnamed: 0_level_0,Unnamed: 1_level_0,x_tr_img_path,depth,y_tr_img_path
index,depth_i,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
001,0,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,1,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,2,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,3,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,4,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
...,...,...,...,...
169,253,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,254,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,255,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,256,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...


In [6]:
x_ts_df

Unnamed: 0_level_0,Unnamed: 1_level_0,x_ts_img_path,depth
index,depth_i,Unnamed: 2_level_1,Unnamed: 3_level_1
171,0,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,119
171,1,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,119
171,2,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,119
171,3,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,119
171,4,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,119
...,...,...,...
219,132,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,137
219,133,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,137
219,134,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,137
219,135,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,137


### Create CV folds for `tr_df`

let's go for 3 folds

In [7]:
def generate_fold_dict(df_, n_folds=3, seed=123):
    
    img_num_idx_list = df_.index.levels[0]
    folder = KFold(n_splits=n_folds, random_state=seed, shuffle=True)
    df_fold_dict = dict()
    
    for i, (train_fold_i, holdout_i) in enumerate(folder.split(img_num_idx_list)):
        train_fold_i_idx = img_num_idx_list[train_fold_i]
        holdout_i_idx = img_num_idx_list[holdout_i]

        df_fold_dict[f'fold_{i}'] = {
            'train': df_.loc[pd.IndexSlice[train_fold_i_idx, :], :],
            'holdout': df_.loc[pd.IndexSlice[holdout_i_idx, :], :]
        }
        
    return df_fold_dict

In [8]:
tr_fold_df_dict =  generate_fold_dict(df_=tr_df, n_folds=3, seed=123)

In [9]:
tr_fold_df_dict['fold_0']['train']

Unnamed: 0_level_0,Unnamed: 1_level_0,x_tr_img_path,depth,y_tr_img_path
index,depth_i,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
005,0,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,98,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
005,1,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,98,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
005,2,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,98,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
005,3,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,98,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
005,4,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,98,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
...,...,...,...,...
169,253,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,254,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,255,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,256,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...


In [10]:
tr_fold_df_dict['fold_0']['holdout']

Unnamed: 0_level_0,Unnamed: 1_level_0,x_tr_img_path,depth,y_tr_img_path
index,depth_i,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
001,0,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,1,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,2,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,3,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,4,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
...,...,...,...,...
159,90,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,95,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
159,91,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,95,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
159,92,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,95,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
159,93,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,95,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...


## Let's create a generator for the trainset

For the first fold

In [11]:
train_data_generator = DataGenerator2D(df=tr_fold_df_dict['fold_0']['train'], x_col='x_tr_img_path', y_col='y_tr_img_path', batch_size=16, num_classes=None, shuffle=True, 
                         resize_dim=(256, 256),
                         rotate_range=30, horizontal_flip=True, vertical_flip=True)

In [12]:
from keras_unet.models import custom_unet
from tensorflow.keras.optimizers import Adam, SGD
from keras_unet.metrics import iou, iou_thresholded
from keras_unet.losses import jaccard_distance

-----------------------------------------
keras-unet init: TF version is >= 2.0.0 - using `tf.keras` instead of `Keras`
-----------------------------------------


In [13]:
model = custom_unet(
    input_shape=(256, 256, 1),
    use_batch_norm=False,
    num_classes=1,
    filters=64,
    dropout=0.2,
    output_activation='sigmoid')

In [14]:
model.compile(
    optimizer=Adam(), 
    #optimizer=SGD(lr=0.01, momentum=0.99),
    loss=focal_loss(gamma=2., alpha=0.9),
    #loss=jaccard_distance,
    metrics=[iou, iou_thresholded]
)

In [15]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 1) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 256, 256, 64) 640         input_1[0][0]                    
__________________________________________________________________________________________________
spatial_dropout2d (SpatialDropo (None, 256, 256, 64) 0           conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 256, 256, 64) 36928       spatial_dropout2d[0][0]          
______________________________________________________________________________________________

In [None]:
model.fit(train_data_generator, epochs=1)

In [52]:
holdout_df = tr_fold_df_dict['fold_0']['train']
holdout_data_generator = DataGenerator2D(df=holdout_df, x_col='x_tr_img_path', y_col='y_tr_img_path', batch_size=16, num_classes=None, shuffle=False,
                                         resize_dim=(256, 256),
                                         rotate_range=None, horizontal_flip=False, vertical_flip=False)

In [54]:
holdout_df.iloc[0,:]

x_tr_img_path    ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
depth                                                           98
y_tr_img_path    ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
Name: (005, 0), dtype: object