In [1]:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.optimizers import SGD, RMSprop
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input
from keras import optimizers
from keras import regularizers
from keras.preprocessing import image
from keras.models import Model
from keras import backend as K
from keras.callbacks import ModelCheckpoint
from keras.models import model_from_json
from keras.callbacks import TensorBoard
from keras.utils.np_utils import to_categorical

import csv
import numpy as np
import PIL

Using TensorFlow backend.


# Setting up train and val set

In [3]:
%pwd

u'/home/paperspace/Cervical_Cancer_Comp'

In [4]:
import os, sys
current_dir = os.getcwd()
LESSON_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data/'

In [5]:
import matplotlib
%matplotlib inline

# Create validation set 

In [31]:
%cd $DATA_HOME_DIR

/home/paperspace/Cervical_Cancer_Comp/data


In [32]:
%mkdir valid
%cd valid
%mkdir Type_1
%mkdir Type_2
%mkdir Type_3

/home/paperspace/Cervical_Cancer_Comp/data/valid


In [33]:
%cd $DATA_HOME_DIR/train

/home/paperspace/Cervical_Cancer_Comp/data/train


In [26]:
from glob import glob

In [34]:
%pwd

u'/home/paperspace/Cervical_Cancer_Comp/data/train'

In [35]:
g = glob('**/*.jpg')
shuf = np.random.permutation(g)
print(shuf)
for i in range(355): os.rename(shuf[i], DATA_HOME_DIR+'/valid/' + shuf[i])

['Type_2/777.jpg' 'Type_2/366.jpg' 'Type_1/481.jpg' ..., 'Type_2/705.jpg'
 'Type_2/451.jpg' 'Type_2/297.jpg']


## Sanity Check

In [36]:
%cd $DATA_HOME_DIR/valid

/home/paperspace/Cervical_Cancer_Comp/data/valid


In [41]:
%pwd

u'/home/paperspace/Cervical_Cancer_Comp/data/valid/Type_1'

In [42]:
%ls -1 | wc -l

65


In [43]:
%cd $DATA_HOME_DIR/valid/Type_2

/home/paperspace/Cervical_Cancer_Comp/data/valid/Type_2


In [44]:
%ls -1 | wc -l

186


In [45]:
%cd $DATA_HOME_DIR/valid/Type_2

/home/paperspace/Cervical_Cancer_Comp/data/valid/Type_2


In [46]:
%cd $DATA_HOME_DIR/valid/Type_3

/home/paperspace/Cervical_Cancer_Comp/data/valid/Type_3


In [47]:
%ls -1 | wc -l

104


# Building the Model

In [6]:
validation_data_dir = DATA_HOME_DIR+'/valid'
train_data_dir = DATA_HOME_DIR+'train'

In [49]:
base_model = InceptionV3(weights='imagenet', include_top=False)

In [50]:
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu', kernel_initializer='glorot_uniform')(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu', kernel_initializer='glorot_uniform')(x)
# and a logistic layer -- let's say we have 200 classes
x = Dropout(0.5)(x)
predictions = Dense(3, activation='softmax', kernel_initializer='glorot_uniform')(x)

In [51]:
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [7]:
def printAllLayers():
    for i, layer in enumerate(model.layers):
       print(i, layer.name)

In [8]:
def printTrainableLayers():
    for i, layer in enumerate(model.layers):
       if layer.trainable == True: 
           print(i, layer.name)

In [54]:
printAllLayers()

(0, 'input_2')
(1, 'conv2d_95')
(2, 'batch_normalization_95')
(3, 'activation_95')
(4, 'conv2d_96')
(5, 'batch_normalization_96')
(6, 'activation_96')
(7, 'conv2d_97')
(8, 'batch_normalization_97')
(9, 'activation_97')
(10, 'max_pooling2d_5')
(11, 'conv2d_98')
(12, 'batch_normalization_98')
(13, 'activation_98')
(14, 'conv2d_99')
(15, 'batch_normalization_99')
(16, 'activation_99')
(17, 'max_pooling2d_6')
(18, 'conv2d_103')
(19, 'batch_normalization_103')
(20, 'activation_103')
(21, 'conv2d_101')
(22, 'conv2d_104')
(23, 'batch_normalization_101')
(24, 'batch_normalization_104')
(25, 'activation_101')
(26, 'activation_104')
(27, 'average_pooling2d_10')
(28, 'conv2d_100')
(29, 'conv2d_102')
(30, 'conv2d_105')
(31, 'conv2d_106')
(32, 'batch_normalization_100')
(33, 'batch_normalization_102')
(34, 'batch_normalization_105')
(35, 'batch_normalization_106')
(36, 'activation_100')
(37, 'activation_102')
(38, 'activation_105')
(39, 'activation_106')
(40, 'mixed0')
(41, 'conv2d_110')
(42, 'batc

In [55]:
# set the first 25 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in model.layers[:311]:
    layer.trainable = False

In [None]:
printTrainableLayers()

In [57]:
opt = RMSprop(lr=1e-5, rho=0.9, decay=0.01)
#opt = SGD(lr=1e-2, momentum=0.9, decay=0.01, nesterov=True)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [58]:
filepath="inception-top-weights-droput-50-2-improvement-{epoch:02d}-{val_loss:}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
tbCallBack = TensorBoard(log_dir='./graph_incep_top_dropout_50_2', histogram_freq=0, write_graph=True, write_images=True)
callbacks_list.append(tbCallBack)

In [10]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [11]:
batch_size = 16
nb_classes = 3
img_height = 299 
img_width = 299
nb_train_samples = 1126

In [61]:
train_datagen = ImageDataGenerator(
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1. / 255,
        zoom_range=0.2,
        fill_mode='nearest')

In [62]:
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical')

Found 1126 images belonging to 3 classes.


In [12]:
# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

In [65]:
# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical')

Found 355 images belonging to 3 classes.


In [None]:
history = model.fit_generator(
        train_generator,
        steps_per_epoch=nb_train_samples // batch_size,
        epochs=200,
        validation_data=validation_generator,
        validation_steps=355 // batch_size,
        callbacks=callbacks_list)

Epoch 1/200
Epoch 2/200

In [None]:
# serialize model to JSON
model_json = model.to_json()
with open("incep_top_only_dropout_50_2.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("incep_top_only_dropout_2.h5")
print("Saved model to disk")

# Lets test it on kaggle

In [13]:
test_data_dir = 'data/test'

In [14]:
# Read Data
test_datagen = ImageDataGenerator(rescale=1. / 255)
test_generator = test_datagen.flow_from_directory(test_data_dir,
                                                  target_size=(img_width, img_height),
                                                  batch_size=16,
                                                  shuffle=False)

Found 512 images belonging to 1 classes.


In [15]:
# load json and create model
json_file = open("good_run/incep_top_only_dropout_50_2.json", 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights("good_run/inception-top-weights-droput-50-2-improvement-36-0.902600862763.hdf5")

In [16]:
# Calculate class posteriors probabilities
y_probabilities = model.predict_generator(test_generator, 32)
print(len(y_probabilities))

512


In [17]:
print(y_probabilities[:5])

[[ 0.12600601  0.6601392   0.21385479]
 [ 0.27472854  0.50746393  0.21780752]
 [ 0.04537117  0.48514774  0.46948111]
 [ 0.03214805  0.42386901  0.54398292]
 [ 0.21812342  0.5947724   0.18710423]]


In [20]:
# Calculate class labels
filenames = [filename.split('/')[1] for filename in test_generator.filenames]
ids = [filename.split('.')[0] for filename in filenames]

In [21]:
results_path = 'results/'
results_name = 'predictions.csv'
# save results as a csv file in the specified results directory
with open(os.path.join(results_path, results_name), 'w') as file:
    writer = csv.writer(file)
    writer.writerow(('image_name', 'Type_1', 'Type_2', 'Type_3'))
    writer.writerows(zip(filenames, y_probabilities[:, 0], y_probabilities[:, 1], y_probabilities[:, 2]))