In [1]:
from __future__ import print_function
import keras
from keras.layers import Dense, Conv2D, BatchNormalization, Activation
from keras.layers import AveragePooling2D, Input, Flatten, Dropout
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.callbacks import ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from keras import backend as K
from keras.models import Model, load_model
from keras.utils import to_categorical
from keras.applications.vgg16 import VGG16, preprocess_input
import numpy as np
import os
import pandas as pd
import os
import json
import cv2

Using TensorFlow backend.


## Import Data

In [2]:
path = "D:/MaunaKea/TrainingSetImagesDir/"
patients = {}

# Creer un dict avec tous les paths des images par patient
for i in range(61):
    patients[str(i)] = []
    for file in os.listdir(path):
        if file.endswith("_" + str(i) + ".png"):
            patients[str(i)] += [file]

# creer un json pour recupérer le dict apres
with open('unsorted_patients.txt', 'w') as json_file:
    json.dump(patients, json_file)

targets = pd.read_csv("train_target.csv")
targets['patient'] = targets['image_filename'].map(lambda x: x.partition('_')[-1].partition('_')[-1].partition('.')[0])
targets = targets.sort_values(by = ['patient', 'image_filename']).set_index(keys = ['image_filename'])

In [3]:
keys = list(patients.keys())
for key in keys:
    if (patients[key] == []):
        patients.pop(key)

In [4]:
validation_set = targets.loc[patients['10'] + patients['8'] + patients['7'] + patients['6']]# + patients['1'] + patients['0']]
training_set = targets.drop(validation_set.index)

In [5]:
small_training_path = "D:/MaunaKea/TrainingSetImagesDir/"
small_training_images = np.zeros((training_set.shape[0], 224,224, 3))
small_training_classes = np.zeros(training_set.shape[0])

small_validation_path = "D:/MaunaKea/TrainingSetImagesDir/"
small_validation_images = np.zeros((validation_set.shape[0], 224, 224, 3))
small_validation_classes = np.zeros(validation_set.shape[0])

i = 0
for image_path in training_set.index:
    small_training_images[i] = cv2.resize(cv2.imread(small_training_path + image_path), (224, 224))
    #small_training_images[i] = preprocess_input(small_training_images[i])
    small_training_classes[i] = training_set.iloc[i]['class_number']
    i += 1
    
i = 0
for image_path in validation_set.index:
    small_validation_images[i] = cv2.resize(cv2.imread(small_validation_path + image_path), (224, 224))
    small_validation_images[i] = preprocess_input(small_validation_images[i])
    small_validation_classes[i] = validation_set.iloc[i]['class_number']
    i += 1

In [6]:
small_training_classes = to_categorical(small_training_classes)
small_validation_classes = to_categorical(small_validation_classes)

## Fine Tuning Block 5   ~91% val acc

In [8]:
model = load_model("vgg16_augmented_nornal_images_no_denoised_85_val_acc.h5")


In [9]:
for layer in model.layers[:15]:
    layer.trainable = False
for layer in model.layers[15:]:
    layer.trainable = True
    
for i, layer in enumerate(model.layers):
    print(i, layer.name, layer.trainable)

0 input_4 False
1 block1_conv1 False
2 block1_conv2 False
3 block1_pool False
4 block2_conv1 False
5 block2_conv2 False
6 block2_pool False
7 block3_conv1 False
8 block3_conv2 False
9 block3_conv3 False
10 block3_pool False
11 block4_conv1 False
12 block4_conv2 False
13 block4_conv3 False
14 block4_pool False
15 block5_conv1 True
16 block5_conv2 True
17 block5_conv3 True
18 block5_pool True
19 flatten_4 True
20 dense_10 True
21 batch_normalization_7 True
22 activation_7 True
23 dropout_7 True
24 dense_11 True
25 batch_normalization_8 True
26 activation_8 True
27 dropout_8 True
28 dense_12 True


In [10]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rotation_range = 180,
   #                          brightness_range = [0.8, 1.2],
                             width_shift_range = 30,
                             height_shift_range = 30,
                             zoom_range = [1.0, 1.2],
                             preprocessing_function = preprocess_input,
                             horizontal_flip = True,
                             vertical_flip = True)


In [11]:
lr = 1e-5
batch_size = 32
n_epochs = 15

optimizer = Adam(lr = lr)
lr_reducer = ReduceLROnPlateau(factor=0.1,
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-7)
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_type = 'VGG16'
model_name = 'finetuned_%s_model.{epoch:03d}.h5' % model_type
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True)
callbacks = [lr_reducer, checkpoint]
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
history = model.fit_generator(datagen.flow(small_training_images, small_training_classes, batch_size),
                                  steps_per_epoch = small_training_images.shape[0]/(batch_size * 2), epochs = n_epochs,
                                  validation_data = (small_validation_images, small_validation_classes),
                                  callbacks = callbacks)

Epoch 1/15

Epoch 00001: val_acc improved from -inf to 0.85231, saving model to C:\Users\Pierre\saved_models\finetuned_VGG16_model.001.h5
Epoch 2/15

Epoch 00002: val_acc improved from 0.85231 to 0.86564, saving model to C:\Users\Pierre\saved_models\finetuned_VGG16_model.002.h5
Epoch 3/15

Epoch 00003: val_acc did not improve from 0.86564
Epoch 4/15

Epoch 00004: val_acc did not improve from 0.86564
Epoch 5/15

Epoch 00005: val_acc improved from 0.86564 to 0.89641, saving model to C:\Users\Pierre\saved_models\finetuned_VGG16_model.005.h5
Epoch 6/15

Epoch 00006: val_acc did not improve from 0.89641
Epoch 7/15

Epoch 00007: val_acc did not improve from 0.89641
Epoch 8/15

Epoch 00008: val_acc improved from 0.89641 to 0.89846, saving model to C:\Users\Pierre\saved_models\finetuned_VGG16_model.008.h5
Epoch 9/15

Epoch 00009: val_acc improved from 0.89846 to 0.90872, saving model to C:\Users\Pierre\saved_models\finetuned_VGG16_model.009.h5
Epoch 10/15

Epoch 00010: val_acc did not improve 

## Fine Tuning Block 4 ~93% val acc

In [12]:
model = load_model("C:/Users/Pierre/saved_models/finetuned_VGG16_model.009.h5")

In [14]:
for layer in model.layers[:11]:
    layer.trainable = False
for layer in model.layers[11:]:
    layer.trainable = True
    
for i, layer in enumerate(model.layers):
    print(i, layer.name, layer.trainable)

0 input_4 False
1 block1_conv1 False
2 block1_conv2 False
3 block1_pool False
4 block2_conv1 False
5 block2_conv2 False
6 block2_pool False
7 block3_conv1 False
8 block3_conv2 False
9 block3_conv3 False
10 block3_pool False
11 block4_conv1 True
12 block4_conv2 True
13 block4_conv3 True
14 block4_pool True
15 block5_conv1 True
16 block5_conv2 True
17 block5_conv3 True
18 block5_pool True
19 flatten_4 True
20 dense_10 True
21 batch_normalization_7 True
22 activation_7 True
23 dropout_7 True
24 dense_11 True
25 batch_normalization_8 True
26 activation_8 True
27 dropout_8 True
28 dense_12 True


In [15]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rotation_range = 180,
   #                          brightness_range = [0.8, 1.2],
                             width_shift_range = 30,
                             height_shift_range = 30,
                             zoom_range = [1.0, 1.2],
                             preprocessing_function = preprocess_input,
                             horizontal_flip = True,
                             vertical_flip = True)


In [16]:
lr = 1e-6
batch_size = 32
n_epochs = 30

optimizer = Adam(lr = lr)
lr_reducer = ReduceLROnPlateau(factor=0.1,
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-7)
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_type = 'VGG16'
model_name = 'finetuned_block4_%s_model.{epoch:03d}.h5' % model_type
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True)
callbacks = [lr_reducer, checkpoint]
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
history = model.fit_generator(datagen.flow(small_training_images, small_training_classes, batch_size),
                                  steps_per_epoch = small_training_images.shape[0]/(batch_size * 2), epochs = n_epochs,
                                  validation_data = (small_validation_images, small_validation_classes),
                                  callbacks = callbacks)

Epoch 1/30

Epoch 00001: val_acc improved from -inf to 0.91077, saving model to C:\Users\Pierre\saved_models\finetuned_block4_VGG16_model.001.h5
Epoch 2/30

Epoch 00002: val_acc did not improve from 0.91077
Epoch 3/30

Epoch 00003: val_acc improved from 0.91077 to 0.91282, saving model to C:\Users\Pierre\saved_models\finetuned_block4_VGG16_model.003.h5
Epoch 4/30

Epoch 00004: val_acc did not improve from 0.91282
Epoch 5/30

Epoch 00005: val_acc did not improve from 0.91282
Epoch 6/30

Epoch 00006: val_acc did not improve from 0.91282
Epoch 7/30

Epoch 00007: val_acc improved from 0.91282 to 0.92615, saving model to C:\Users\Pierre\saved_models\finetuned_block4_VGG16_model.007.h5
Epoch 8/30

Epoch 00008: val_acc did not improve from 0.92615
Epoch 9/30

Epoch 00009: val_acc did not improve from 0.92615
Epoch 10/30

Epoch 00010: val_acc did not improve from 0.92615
Epoch 11/30

Epoch 00011: val_acc did not improve from 0.92615
Epoch 12/30

Epoch 00012: val_acc did not improve from 0.9261

## Fine Tuning Block 3

In [17]:
model = load_model("C:/Users/Pierre/saved_models/finetuned_block4_VGG16_model.007.h5")

In [18]:
for layer in model.layers[:7]:
    layer.trainable = False
for layer in model.layers[7:]:
    layer.trainable = True
    
for i, layer in enumerate(model.layers):
    print(i, layer.name, layer.trainable)

0 input_4 False
1 block1_conv1 False
2 block1_conv2 False
3 block1_pool False
4 block2_conv1 False
5 block2_conv2 False
6 block2_pool False
7 block3_conv1 True
8 block3_conv2 True
9 block3_conv3 True
10 block3_pool True
11 block4_conv1 True
12 block4_conv2 True
13 block4_conv3 True
14 block4_pool True
15 block5_conv1 True
16 block5_conv2 True
17 block5_conv3 True
18 block5_pool True
19 flatten_4 True
20 dense_10 True
21 batch_normalization_7 True
22 activation_7 True
23 dropout_7 True
24 dense_11 True
25 batch_normalization_8 True
26 activation_8 True
27 dropout_8 True
28 dense_12 True


In [19]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rotation_range = 180,
   #                          brightness_range = [0.8, 1.2],
                             width_shift_range = 30,
                             height_shift_range = 30,
                             zoom_range = [1.0, 1.2],
                             preprocessing_function = preprocess_input,
                             horizontal_flip = True,
                             vertical_flip = True)


In [20]:
lr = 1e-7
batch_size = 32
n_epochs = 30

optimizer = Adam(lr = lr)
lr_reducer = ReduceLROnPlateau(factor=0.1,
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-7)
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_type = 'VGG16'
model_name = 'finetuned_block3_%s_model.{epoch:03d}.h5' % model_type
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True)
callbacks = [lr_reducer, checkpoint]
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
history = model.fit_generator(datagen.flow(small_training_images, small_training_classes, batch_size),
                                  steps_per_epoch = small_training_images.shape[0]/(batch_size * 2), epochs = n_epochs,
                                  validation_data = (small_validation_images, small_validation_classes),
                                  callbacks = callbacks)

Epoch 1/30

Epoch 00001: val_acc improved from -inf to 0.90462, saving model to C:\Users\Pierre\saved_models\finetuned_block3_VGG16_model.001.h5
Epoch 2/30

Epoch 00002: val_acc did not improve from 0.90462
Epoch 3/30

Epoch 00003: val_acc did not improve from 0.90462
Epoch 4/30

Epoch 00004: val_acc did not improve from 0.90462
Epoch 5/30

Epoch 00005: val_acc did not improve from 0.90462
Epoch 6/30

Epoch 00006: val_acc improved from 0.90462 to 0.91282, saving model to C:\Users\Pierre\saved_models\finetuned_block3_VGG16_model.006.h5
Epoch 7/30

Epoch 00007: val_acc did not improve from 0.91282
Epoch 8/30

Epoch 00008: val_acc did not improve from 0.91282
Epoch 9/30

Epoch 00009: val_acc did not improve from 0.91282
Epoch 10/30

Epoch 00010: val_acc did not improve from 0.91282
Epoch 11/30

Epoch 00011: val_acc did not improve from 0.91282
Epoch 12/30

Epoch 00012: val_acc did not improve from 0.91282
Epoch 13/30

Epoch 00013: val_acc did not improve from 0.91282
Epoch 14/30

Epoch 00

## Model evaluation

In [5]:
model = load_model("C:/Users/Pierre/saved_models/finetuned_block4_VGG16_model.007.h5")

In [22]:
preprocessed_train = preprocess_input(small_training_images)

In [23]:

train_pred = model.predict(preprocessed_train)

In [24]:
from sklearn.metrics import accuracy_score, confusion_matrix


In [25]:
accuracy_score(small_training_classes.argmax(axis = 1), train_pred.argmax(axis = 1))

0.9671821508676661

In [26]:
val_pred = model.predict(small_validation_images)

In [27]:
accuracy_score(small_validation_classes.argmax(axis = 1), val_pred.argmax(axis = 1))

0.9261538461538461

In [28]:
confusion_matrix(small_training_classes.argmax(axis = 1), train_pred.argmax(axis = 1))

array([[ 813,    3,    2,    7],
       [   8, 2956,   16,   22],
       [  19,   18, 1081,   28],
       [  53,   65,   37, 3343]], dtype=int64)

In [29]:
confusion_matrix(small_validation_classes.argmax(axis = 1), val_pred.argmax(axis = 1))

array([[602,   0,   5,  37],
       [  1, 149,  23,   2],
       [  0,   0,  58,   2],
       [  0,   2,   0,  94]], dtype=int64)

## Image per Image Submission

In [6]:
submission = pd.read_csv("D:/MaunaKea/submission.csv")

In [7]:
test_path = "D:/MaunaKea/TestSetImagesDir/"
test_images = np.zeros((submission['image_name'].shape[0], 224, 224, 3))


for i,image_path in enumerate(submission['image_name']):
    image = cv2.imread(test_path + image_path)
    image = cv2.resize(image, (224, 224))
    image = image.reshape(1, 224, 224, 3)
    test_images[i] = image

test_images = preprocess_input(test_images)

In [32]:
submission['predictions'] = model.predict(test_images).argmax(axis = 1)

In [33]:
submission['predictions'].value_counts()

3    577
1    453
2    366
0    319
Name: predictions, dtype: int64

In [34]:
submission.to_csv('D:/MaunaKea/imperim_finetuned_vgg.csv', index = False)

## Patient wise submition

In [8]:
patients = {}

# Creer un dict avec tous les paths des images par patient
for i in range(61):
    patients[str(i)] = []
    for file in os.listdir(test_path):
        if file.endswith("_" + str(i) + ".png"):
            patients[str(i)] += [file]


In [9]:
keys = list(patients.keys())
for key in keys:
    if (patients[key] == []):
        patients.pop(key)

In [10]:
patients.keys()

dict_keys(['9', '16', '20', '21', '26', '27', '28', '33', '37', '38', '39', '52', '56', '57', '58', '59', '60'])

In [11]:
patients_pred = {}
for patient in patients.keys():
    patient_images = np.zeros((len(patients[patient]), 224, 224, 3))
    for i,image_path in enumerate(patients[patient]):
        image = cv2.imread(test_path + image_path)
        image = cv2.resize(image, (224, 224))
        image = preprocess_input(image).reshape(1, 224, 224, 3)
        patient_images[i] = image
    patients_pred[patient] = model.predict(patient_images).argmax(axis = 1)
    print(patient)



9
16
20
21
26
27
28
33
37
38
39
52
56
57
58
59
60


In [14]:
patient_class = {}
for patient in patients_pred.keys():
    predicted_class = np.bincount(patients_pred[patient]).argmax()
    for image_path in patients[patient]:
        submission.loc[submission['image_name'] == image_path, 'predictions'] = predicted_class
    patient_class[patient] = predicted_class

In [168]:
submission.to_csv('D:/MaunaKea/finetuned_vgg_patient_wise.csv', index = False)

In [15]:
patient_class

{'9': 0,
 '16': 0,
 '20': 3,
 '21': 2,
 '26': 3,
 '27': 1,
 '28': 3,
 '33': 0,
 '37': 1,
 '38': 3,
 '39': 2,
 '52': 2,
 '56': 1,
 '57': 0,
 '58': 2,
 '59': 3,
 '60': 1}