In [3]:
import os

import pandas as pd
from sklearn.model_selection import KFold 

In [4]:
from img_generator import build_train_test_df, DataGenerator2D
from model_utils import calculate_iou_holdout_set, jaccard_distance_loss, focal_loss

In [5]:
data_path_source_dir = os.path.join('ml4h_proj1_colon_cancer_ct', 'ml4h_proj1_colon_cancer_ct')

## Create dataframes in the format and with the information required by the generator

### Create datframes with paths and depth for the images

In [6]:
tr_df, x_ts_df = build_train_test_df(data_path_source_dir)

In [7]:
tr_df

Unnamed: 0_level_0,Unnamed: 1_level_0,x_tr_img_path,depth,y_tr_img_path
index,depth_i,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
001,0,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,1,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,2,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,3,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,4,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
...,...,...,...,...
169,253,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,254,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,255,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,256,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...


In [8]:
x_ts_df

Unnamed: 0_level_0,Unnamed: 1_level_0,x_ts_img_path,depth
index,depth_i,Unnamed: 2_level_1,Unnamed: 3_level_1
171,0,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,119
171,1,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,119
171,2,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,119
171,3,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,119
171,4,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,119
...,...,...,...
219,132,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,137
219,133,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,137
219,134,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,137
219,135,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,137


### Create CV folds for `tr_df`

let's go for 3 folds

In [9]:
def generate_fold_dict(df_, n_folds=3, seed=123):
    
    img_num_idx_list = df_.index.levels[0]
    folder = KFold(n_splits=n_folds, random_state=seed, shuffle=True)
    df_fold_dict = dict()
    
    for i, (train_fold_i, holdout_i) in enumerate(folder.split(img_num_idx_list)):
        train_fold_i_idx = img_num_idx_list[train_fold_i]
        holdout_i_idx = img_num_idx_list[holdout_i]

        df_fold_dict[f'fold_{i}'] = {
            'train': df_.loc[pd.IndexSlice[train_fold_i_idx, :], :],
            'holdout': df_.loc[pd.IndexSlice[holdout_i_idx, :], :]
        }
        
    return df_fold_dict

In [10]:
tr_fold_df_dict =  generate_fold_dict(df_=tr_df, n_folds=3, seed=123)

In [11]:
tr_fold_df_dict['fold_0']['train']

Unnamed: 0_level_0,Unnamed: 1_level_0,x_tr_img_path,depth,y_tr_img_path
index,depth_i,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
005,0,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,98,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
005,1,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,98,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
005,2,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,98,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
005,3,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,98,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
005,4,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,98,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
...,...,...,...,...
169,253,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,254,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,255,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
169,256,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,258,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...


In [12]:
tr_fold_df_dict['fold_0']['holdout']

Unnamed: 0_level_0,Unnamed: 1_level_0,x_tr_img_path,depth,y_tr_img_path
index,depth_i,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
001,0,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,1,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,2,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,3,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
001,4,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,60,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
...,...,...,...,...
159,90,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,95,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
159,91,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,95,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
159,92,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,95,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
159,93,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...,95,ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...


## Let's create a generator for the trainset

For the first fold

In [13]:
train_data_generator = DataGenerator2D(df=tr_fold_df_dict['fold_0']['train'], x_col='x_tr_img_path', y_col='y_tr_img_path', batch_size=16, num_classes=None, shuffle=True, 
                         resize_dim=(256, 256),
                         rotate_range=30, horizontal_flip=True, vertical_flip=True)

In [20]:
from keras_unet.models import custom_unet
from tensorflow.keras.optimizers import Adam, SGD
from keras_unet.metrics import iou, iou_thresholded
from keras_unet.losses import jaccard_distance

In [18]:
model = custom_unet(
    input_shape=(256, 256, 192),
    use_batch_norm=False,
    num_classes=1,
    filters=64,
    dropout=0.2,
    output_activation='sigmoid')

In [22]:
model.compile(
    optimizer=Adam(), 
    #optimizer=SGD(lr=0.01, momentum=0.99),
    loss=focal_loss(gamma=2., alpha=0.9),
    #loss=jaccard_distance,
    metrics=[iou, iou_thresholded]
)

In [15]:
!pip install pydot



In [16]:
!pip install pydotplus



In [17]:
import tensorflow as tf
tf.keras.utils.plot_model(
    model,
    to_file="model.png",
    show_shapes=False,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
)

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')


In [18]:
from keras.utils.vis_utils import plot_model

In [19]:
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')


In [34]:
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, 256, 256, 19 0                                            
__________________________________________________________________________________________________
conv2d_114 (Conv2D)             (None, 256, 256, 64) 110656      input_7[0][0]                    
__________________________________________________________________________________________________
spatial_dropout2d_30 (SpatialDr (None, 256, 256, 64) 0           conv2d_114[0][0]                 
__________________________________________________________________________________________________
conv2d_115 (Conv2D)             (None, 256, 256, 64) 36928       spatial_dropout2d_30[0][0]       
____________________________________________________________________________________________

In [23]:
model.fit(train_data_generator, epochs=1)



ValueError: in user code:

    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function  *
        return step_function(self, iterator)
    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:788 run_step  **
        outputs = model.train_step(data)
    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:754 train_step
        y_pred = self(x, training=True)
    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:1012 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\keras\engine\functional.py:425 call
        inputs, training=training, mask=mask)
    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\keras\engine\functional.py:560 _run_internal_graph
        outputs = node.layer(*args, **kwargs)
    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    c:\users\juan diego\documents\ondemna\venv\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:239 assert_input_compatibility
        str(tuple(shape)))

    ValueError: Input 0 of layer conv2d_19 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (None, None, None)


In [52]:
holdout_df = tr_fold_df_dict['fold_0']['train']
holdout_data_generator = DataGenerator2D(df=holdout_df, x_col='x_tr_img_path', y_col='y_tr_img_path', batch_size=16, num_classes=None, shuffle=False,
                                         resize_dim=(256, 256),
                                         rotate_range=None, horizontal_flip=False, vertical_flip=False)

In [54]:
holdout_df.iloc[0,:]

x_tr_img_path    ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
depth                                                           98
y_tr_img_path    ml4h_proj1_colon_cancer_ct\ml4h_proj1_colon_ca...
Name: (005, 0), dtype: object