In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

from __future__ import division

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

import six
import numpy as np
import pandas as pd
import cv2
import glob
import random

np.random.seed(2016)
random.seed(2016)

from keras.models import Model
from keras.layers import Input, Activation, merge, Dense, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Any results you write to the current directory are saved as output.

In [None]:
import gc
import sys
 
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import *
from keras import applications as apps
from keras import optimizers as opt
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import to_categorical
from keras.callbacks import LearningRateScheduler

## Define Useful Functions

In [None]:
def setup_to_transfer_learn(model, base_model) -> 'model':
    """Setup the models for transfer learning"""
    for layer in base_model.layers:
        layer.trainable = False

    model.compile(
        Adam(lr=conf['learnr']),    
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model
 

In [None]:
def setup_to_finetune(model, n) -> 'model':
    """Setup the models for finetunning."""
    # Setting everything bellow n to be not trainable
    for i, layer in enumerate(model.layers):
            layer.trainable = i > n
 
    model.compile(
        optimizer=opt.SGD(lr=0.0001),
        momentum=0.9,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
 
    return model

In [None]:
def batch_generator_train(files, batch_size):
    import numpy
    number_of_batches = np.ceil(len(files)/batch_size)
    counter = 0
    numpy.random.shuffle(files) # shuffles Files in place
    while True:
        batch_files = files[batch_size*counter:batch_size*(counter+1)]
        image_list = []
        mask_list = []
        for f in batch_files:
            image = cv2.imread(f)
            image = cv2.resize(image, conf['image_shape'])

            cancer_type = f[62:63] # relies on path lengths that is hard coded below
            if cancer_type == '1':
                mask = [1, 0, 0]
            elif cancer_type == '2':
                mask = [0, 1, 0]
            else:
                mask = [0, 0, 1]

            image_list.append(image)
            mask_list.append(mask)
        counter += 1
        image_list = np.array(image_list)
        mask_list = np.array(mask_list)

        yield image_list, mask_list

        if counter == number_of_batches:
            random.shuffle(files)
            counter = 0

In [None]:
def entire_generator(files):
    
    import numpy
    
    image_list = []
    mask_list = []
    
    numpy.random.shuffle(files) # shuffles Files in place
    
    for f in files:
        image = cv2.imread(f)
        image = cv2.resize(image, conf['image_shape'])

        cancer_type = f[62] # relies on path lengths that is hard coded below
        if cancer_type == '1':
            mask = [1, 0, 0]
        elif cancer_type == '2':
            mask = [0, 1, 0]
        else:
            mask = [0, 0, 1]

        image_list.append(image)
        mask_list.append(mask)

    image_list = np.array(image_list)
    mask_list = np.array(mask_list)
    
    return image_list, mask_list

## Main Code

### Set Parameters

In [None]:
conf = dict()

# How many patients will be in train and validation set during training. Range: (0; 1)
conf['train_valid_fraction'] = 0.75

# Batch size for CNN [Depends on GPU and memory available]
conf['batch_size'] = 32

# Number of epochs for CNN training
conf['nb_epoch'] = 25
#conf['nb_epoch'] = 1

# Early stopping. Stop training after epochs without improving on validation
conf['patience'] = 3

# Shape of image for CNN (Larger the better, but you need to increase CNN as well)
#conf['image_shape'] = (4160,4128)
#conf['image_shape'] = (2080,2064)
#conf['image_shape'] = (1024,1024)
conf['image_shape'] = (64,64)

conf['learnr'] = 0.005

acc_title = 'Accuracy_ResNet50_FreezeAll_WthDecay'
loss_title = 'MultiClass_CrossEntropyLoss_ResNet50_FreezeAll_WthDecay'

#imgGen = ImageDataGenerator() # No Augmentation
imgGen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

### Load and Split Data into Training and Validation

In [None]:
import glob
import numpy as np

filepaths = []
filepaths.append('../input/intel-mobileodt-cervical-cancer-screening/train/Type_1/')
filepaths.append('../input/intel-mobileodt-cervical-cancer-screening/train/Type_2/')
filepaths.append('../input/intel-mobileodt-cervical-cancer-screening/train/Type_3/')

allFiles = []

for i, filepath in enumerate(filepaths):
    files = glob.glob(filepath + '*.jpg')
    allFiles = allFiles + files

train_fraction = 0.75

split_point = int(round(train_fraction*len(allFiles)))

np.random.shuffle(allFiles)
train_list = allFiles[:split_point]
valid_list = allFiles[split_point:]
print('Train patients: {}'.format(len(train_list)))
print('Validation patients: {}'.format(len(valid_list)))

### Generate entire list

In [None]:
train_data, train_labels = entire_generator(train_list)

### Create Model

In [None]:
filename_model_json = 'model.json'
filename_model_weights = '../input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
batch_size = conf['batch_size']
num_classes = 3
steps_train = (30938/256)
steps_test = (3596/256)
 
base_model = apps.resnet50.ResNet50(include_top=False, 
                                    weights = filename_model_weights, 
                                    pooling = 'avg')
ppf = apps.resnet50.preprocess_input
 
x = base_model.output
pred = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=pred)


In [None]:
model = setup_to_transfer_learn(model, base_model)

In [None]:
from keras import utils 
utils.print_summary(model)

In [None]:
import math
def step_decay(epoch):
    initial_lrate = conf['learnr']
    drop = 0.5
    epochs_drop = 10.0
    lrate = initial_lrate * math.pow(drop,  
           math.floor((1+epoch)/epochs_drop))
    return lrate

In [None]:
print('Fit model...')

imgGen.fit(train_data)
generator= imgGen.flow(train_data, train_labels, conf['batch_size'])

history = model.fit_generator(#generator=batch_generator_train(train_list, conf['batch_size']),
                          generator,
                          steps_per_epoch=len(generator)/conf['batch_size'],
                          epochs=conf['nb_epoch'],
                          validation_data=batch_generator_train(valid_list, batch_size = conf['batch_size']),
                          validation_steps=len(valid_list)/conf['batch_size'],
                          callbacks = [LearningRateScheduler(step_decay)],
                         )

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20,10))
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title(acc_title)
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
plt.savefig(acc_title + '.jpg')

In [None]:
plt.figure(figsize=(20,10))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title(loss_title)
plt.ylabel('Multiclass_crossentropy_loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
plt.savefig(loss_title + '.jpg')