# Training of a fully convolutional neural network with transfer learning
Part of the code comes from https://github.com/fastai/courses/blob/master/deeplearning1/nbs/lesson7.ipynb where different architectures are tried for the Kaggle fisheries competition

In [None]:
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
import scipy
from skimage.transform import resize
import matplotlib.pyplot as plt  
%matplotlib inline  
from keras import backend as K
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils.np_utils import to_categorical

In [None]:
#VGG-16 moodel with batch normalization
from vgg16bn import Vgg16BN

### VGG-16 Description

In [None]:
vgg16Full = Vgg16BN().model

In [None]:
vgg16Full.summary()

In [None]:
#VGG-16 moodel whithout activation layers
vgg16NoTop = Vgg16BN(include_top = False).model

In [None]:
vgg16NoTop.summary()

## Functions needed for the training 

In [None]:
#Takes the path from a directory & generates batches of augmented data for neural network input
#The list of classes will be automatically inferred from the subdirectory names/structure under directory, 
#where each subdirectory will be treated as a different class 
#(and the order of the classes, which will map to the label indices, will be alphanumeric). 
#"categorical" will be 2D one-hot encoded labels,
def get_batches(dirname, gen=ImageDataGenerator(), shuffle=True, batch_size=4, class_mode='categorical',
                target_size=(224,224)):
    return gen.flow_from_directory(dirname, target_size=target_size,
            class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)

In [None]:
#Gather data from path to batches (shuffle is False thus data in batch will stay at same position)
def get_data(path, target_size=(224,224)):
    batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
    return np.concatenate([batches.next() for i in range(batches.samples)])

In [None]:
#Converts a class vector (integers) to binary class matrix same as one hot encoding
def onehot(x):
    #from Keras
    return to_categorical(x)

In [None]:
def to_plot(img):
    if K.image_dim_ordering() == 'tf':
        return np.rollaxis(img, 0, 1).astype(np.uint8)
    else:
        return np.rollaxis(img, 0, 3).astype(np.uint8)

def plot(img):
    plt.imshow(to_plot(img))


In [None]:
#Get class label for each data in batches
def get_classes(path):
    batches = get_batches(path+'train', shuffle=False, batch_size=1)
    val_batches = get_batches(path+'validation', shuffle=False, batch_size=1)
    return (val_batches.classes, batches.classes, onehot(val_batches.classes), onehot(batches.classes),
        val_batches.filenames, batches.filenames)

In [None]:
#Split the model at the index containing last convolutional layer in order to retrieve the input size of 
#the new implemented network for the training
def split_at(model, layer_type):
    layers = model.layers
    layer_idx = [index for index,layer in enumerate(layers)
                 if type(layer) is layer_type][-1]
    return layers[:layer_idx+1], layers[layer_idx+1:]

In [None]:
#path for train and validation data
path='./'

In [None]:
(val_classes, trn_classes, val_labels, trn_labels, val_filenames, filenames) = get_classes(path)

In [None]:
trn = get_data(path+'train')
val = get_data(path+'validation')

In [None]:
plot(trn[0])

# Construction of vgg16 model whithout activation part

In [None]:
vgg16 = Vgg16BN(include_top = False).model

In [None]:
#Remove last layer which is a maxPooling layer
vgg16.pop()
vgg16.input_shape, vgg16.output_shape
vgg16.compile(Adam(), 'categorical_crossentropy', metrics=['accuracy'])

In [None]:
vgg16.summary()

In [None]:
#Input = shape of the image (224x224 with 3 RGB layers)
#Output = size of feature shape from the sequence of convolutional layers
vgg16.input_shape, vgg16.output_shape

### Predict features for training and validation images

In [None]:
conv_val_feat = vgg16.predict(val, batch_size=20, verbose=1)

In [None]:
conv_trn_feat = vgg16.predict(trn, batch_size=20, verbose=1)

In [None]:
conv_val_feat.shape, conv_trn_feat.shape

### Save features or import them

In [None]:
import pickle as pkl

Names of the files for the saving and the loading below in this notebook are differents (but should be the same) in order to not erase previously computed features.  

In [None]:
output = open('conv_val_feat.pkl','wb')
pkl.dump(conv_val_feat, output)
output.close()

In [None]:
output = open('conv_trn_feat.pkl','wb')
pkl.dump(conv_trn_feat, output)
output.close()

In [None]:
conv_val_feat = pkl.load( open("conv_val.pkl", "rb"))
conv_trn_feat = pkl.load( open("conv_trn.pkl", "rb"))
conv_val_feat.shape, conv_trn_feat.shape

## Create a fully convolutional network taking as input the above features 
## With global average pooling as final layer
This solution was not taked into account as whitout the global average pooling we seem to have better visual results (see below)

In [None]:
from keras.layers.convolutional import *
from keras.models import Sequential, Model
from keras.layers.normalization import BatchNormalization
from keras.layers import GlobalAveragePooling2D, Activation, Dropout
from keras import backend as K
from keras.optimizers import SGD, RMSprop, Adam

In [None]:
conv_layers,_ = split_at(vgg16, Convolution2D)
conv_layers[-1].output_shape[1:]

#### Construct the new model

In [None]:
#number of filters and dropout probability
nf=128; p=0

In [None]:
#Define the rest of the architecture that will take as input 
#the features computed at the last convolutional layer of vgg16
def get_lrg_layers():
    return [
        BatchNormalization(axis=1, input_shape=conv_layers[-1].output_shape[1:]),
        Convolution2D(nf,3,3, activation='relu', border_mode='same'),
        BatchNormalization(axis=1),
        MaxPooling2D(),
        Convolution2D(nf,3,3, activation='relu', border_mode='same'),
        BatchNormalization(axis=1),
        MaxPooling2D(),
        Convolution2D(nf,3,3, activation='relu', border_mode='same'),
        BatchNormalization(axis=1),
        MaxPooling2D((1,2)),
        Convolution2D(2,3,3, border_mode='same'),
        Dropout(p),
        GlobalAveragePooling2D(),
        Activation('softmax')
    ]

In [None]:
lrg_model = Sequential(get_lrg_layers())

In [None]:
lrg_model.summary()

In [None]:
lrg_model.compile(Adam(lr=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

#### Training of the fully convolutional network

In [None]:
batch_size=64

history_lrg = lrg_model.fit(conv_trn_feat, trn_labels, batch_size=batch_size, nb_epoch=6, 
             validation_data=(conv_val_feat, val_labels))

### Show heatmap of last convolutional layer (better below)
Grabs the output of last convolutional layer (which is the 4th-last layer of our model).

In [None]:
l = lrg_model.layers
conv_fn = K.function([l[0].input, K.learning_phase()], [l[-4].output])

In [None]:
#Modified one
def get_cm(inp, label):
    '''Convert the 4x4 layer data to a 75x75 image.'''
    conv = np.rollaxis(conv_fn([inp,0])[0][0],2,0)[label]
    return scipy.misc.imresize(conv, (224,224))

In [None]:
#We have to add an extra dimension to our input since the CNN expects a 'batch' (even if it's just a batch of one).
inp = np.expand_dims(conv_val_feat[330],0)
inp.shape

In [None]:
conv.shape

In [None]:
np.round(lrg_model.predict(inp)[0],2)

In [None]:
plt.imshow(to_plot(val[330]))

## Fully convolutional neural network heatmap
## Whithout global average pooling layer

In [None]:
vgg16 = Vgg16BN(include_top = False).model

In [None]:
vgg16.pop()
vgg16.input_shape, vgg16.output_shape
vgg16.compile(Adam(), 'categorical_crossentropy', metrics=['accuracy'])

In [None]:
conv_layers,_ = split_at(vgg16, Convolution2D)

#### Construct the new model

In [None]:
#Number of filters
nf=128;

In [None]:
def get_lrg_layers():
    return [
        BatchNormalization(axis=1, input_shape=conv_layers[-1].output_shape[1:]),
        Convolution2D(nf,3,3, activation='relu', border_mode='same'),
        BatchNormalization(axis=1),
        Convolution2D(nf,3,3, activation='relu', border_mode='same'),
        BatchNormalization(axis=1),
        Convolution2D(nf,3,3, activation='relu', border_mode='same'),
        BatchNormalization(axis=1),
        Convolution2D(2,3,3, border_mode='same'),
        GlobalAveragePooling2D(),
        Activation('softmax')
    ]

In [None]:
lrg_model = Sequential(get_lrg_layers())

lrg_model.summary()

In [None]:
lrg_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

### Training is done in two parts

In [None]:
lrg_model.fit(conv_trn_feat, trn_labels, batch_size=batch_size, nb_epoch=2, 
             validation_data=(conv_val_feat, val_labels))

In [None]:
lrg_model.optimizer.lr=1e-5

In [None]:
hist_lrg2 = lrg_model.fit(conv_trn_feat, trn_labels, batch_size=batch_size, nb_epoch=6, 
             validation_data=(conv_val_feat, val_labels))

### Save the learned weights
Again files names are not the same below in order to not erase previously computed weights

In [None]:
lrg_model.save_weights('my_model_weights.h5_not')

In [None]:
lrg_model.load_weights('my_model_weights.h5')

In [None]:
l = lrg_model.layers
conv_fn = K.function([l[0].input, K.learning_phase()], [l[-3].output])

### Show heatmap of last convolutional layer

In [None]:
#Use this one !
def get_cm2(inp, label):
    '''Convert the 4x4 layer data to a 75x75 image.'''
    conv = np.rollaxis(conv_fn([inp,0])[0][0],0)[label]
    return scipy.misc.imresize(conv, (224,224))

In [None]:
inp = np.expand_dims(conv_val_feat[330], 0)
inp.shape

In [None]:
plt.imshow(to_plot(val[330]))

In [None]:
cm = get_cm2(inp, 1)

In [None]:
plt.imshow(cm, cmap="cool")

In [None]:
plt.figure(figsize=(5,5))
plot(val[330])
plt.imshow(cm, cmap="cool", alpha=0.5)

### Use a dataframe to store and display the images as well as their respective activation heatmap

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame(columns=["image", "pred","input"])

In [None]:
#Store all the images (from the validation set) as well as their prediction and their reshaped size for heatmap computation
imageList = []
predList = []
inptList = []
for i in range(val.shape[0]):
    imageList.append(val[i])
    inpt = np.expand_dims(conv_val_feat[i], 0)
    pred = np.round(lrg_model.predict(inpt)[0],2)[1]
    inptList.append(inpt)
    predList.append(pred)


In [None]:
df["image"] = imageList
df["pred"] = predList
df["input"] = inptList
df.shape

In [None]:
#Sort rows in the dataframe in prediction order for the first label
df2 = df.sort_values(by = "pred", ascending = False)

### Save positively labeled images with their corresponding activation layer

In [None]:
positive = df2['pred'] >= 0.5
filteredDf = df2[positive]

####  whitout activation

In [None]:
i = 0
for image in filteredDf["image"]:
    fig = plt.figure()
    fig.set_size_inches(1, 1, forward=False)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    ax.imshow(to_plot(image))
    fig.savefig('./verification/image'+str(i)+'.png',dpi=224)
    i+=1
    plt.close()
    

#### with activation

In [None]:
i = 0
for index, row in df2.iterrows():
    cm2 = get_cm2(row['input'],1)
    fig = plt.figure()
    fig.set_size_inches(1, 1, forward=False)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    ax.imshow(to_plot(row['image']))
    ax.imshow(cm2, cmap="cool", alpha=0.5)
    fig.savefig('./verificationWA/image'+str(i)+'.png',dpi=224)
    i+=1
    plt.close()

### Poster creation

In [None]:
scale = 3
inches_A2 = (23.39 * scale, 16.53 * scale) 
fig, ax = plt.subplots(15,24,figsize=inches_A2) 
#fig, ax = plt.subplots(2,5, figsize = inches_A2)
axs = ax.ravel()
i = 0
for image in df2["image"]:
    inp2 = np.expand_dims(image, 0)
    axs[i].axis('off')
    axs[i].imshow(to_plot(image))
    i+=1

fig.savefig('poster_validation.png', bbox_inches='tight')


In [None]:
scale = 3
inches_A2 = (23.39 * scale, 16.53 * scale) 
fig, ax = plt.subplots(15,24,figsize=inches_A2) 
#fig, ax = plt.subplots(2,5, figsize = inches_A2)
axs = ax.ravel()
i = 0
for index, row in df2.iterrows():
    cm2 = get_cm2(row['input'],1)
    #plot(row['image'])
    #plt.imshow(cm2, cmap="cool", alpha=0.5)
    axs[i].axis('off')
    axs[i].imshow(to_plot(row['image']))
    axs[i].imshow(cm2, cmap="cool", alpha=0.5)
    i+=1

fig.savefig('poster5.png', bbox_inches='tight')


## Plot confusion matrix

In [None]:
predVal = lrg_model.predict_classes(conv_val_feat)
predVal.shape

In [None]:
import itertools
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline  

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
validation_labels_bin = [x[1] for x in val_labels]

In [None]:
conf_mat = confusion_matrix(validation_labels_bin, predVal )

In [None]:
class_names = ["0","1"]
plot_confusion_matrix(conf_mat, class_names, title='Confusion matrix, without normalization')

In [None]:
plot_confusion_matrix(conf_mat, class_names, normalize=True, title='Normalized confusion matrix')