# Setup

In [1]:
# import packages
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import sklearn
import sys
import matplotlib
from tensorflow import keras
import datetime
import matplotlib.pyplot as plt

In [2]:
# import support libraries
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, \
                                                    img_to_array, load_img
                                                    
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, \
                                        BatchNormalization, GlobalAveragePooling2D

from tensorflow.keras import Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.applications import ResNet50, InceptionV3, VGG16, Xception

In [3]:
print('Version check:')
print('Python: {}'.format(sys.version))
print('pandas: {}'.format(pd.__version__))
print('NumPy: {}'.format(np.__version__))
print('sklearn: {}'.format(sklearn.__version__))
print('matplotlib: {}'.format(matplotlib.__version__))
print('TensorFlow: {}'.format(tf.__version__))
print('Keras: {}'.format(keras.__version__))

Version check:
Python: 3.8.2 (default, Apr 27 2020, 15:53:34) 
[GCC 9.3.0]
pandas: 1.0.3
NumPy: 1.18.3
sklearn: 0.22.2.post1
matplotlib: 3.2.1
TensorFlow: 2.2.0
Keras: 2.3.0-tf


In [4]:
# Backend Settings

# clear Keras session
keras.backend.clear_session()

# set seeds
np.random.seed(18)
tf.random.set_seed(18)

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('XLA_GPU')))
#tf.debugging.set_log_device_placement(True)

Num GPUs Available:  1


# Global functions

In [5]:
class TrainRuntimeCallback(keras.callbacks.Callback):

  def on_train_begin(self,logs={}):
    self.start = datetime.datetime.now()

  def on_train_end(self,logs={}):
    self.process_time = (datetime.datetime.now() - self.start).total_seconds()

class TestRuntimeCallback(keras.callbacks.Callback):

  def on_test_begin(self,logs={}):
    self.start = datetime.datetime.now()

  def on_test_end(self,logs={}):
    self.process_time = (datetime.datetime.now() - self.start).total_seconds()

In [6]:
def train_model(model, optimizer, train_input, val_input, model_name):
    data = dict()

    # Compile model
    model.compile(optimizer = optimizer,
                    loss = 'categorical_crossentropy',
                    metrics = ['accuracy'])

    # Create a callback to record training time
    train_rt = TrainRuntimeCallback()

    # Model fitting parameters
    history = model.fit(
        train_input,
        steps_per_epoch = len(sm_train_df) // batch_size,
        epochs = 20,
        callbacks = [train_rt],
        validation_data=val_input,
        validation_steps = (len(sm_val_df) // batch_size)
    )

    train_time = train_rt.process_time
    #print(train_time)

    history_dict = history.history

    data['model'] = model_name
    data['train_time'] = train_time
    data['train_loss'] = history_dict['loss'][-1]
    data['train_acc'] = history_dict['accuracy'][-1]
    data['val_loss'] = history_dict['val_loss'][-1]
    data['val_acc'] = history_dict['val_accuracy'][-1]

    return data, history_dict

In [7]:
def test_model(model, test_input):
    
    data = dict()

    # Create test callback
    test_rt = TestRuntimeCallback()

    test_loss, test_acc = model.evaluate(
        test_input,
        steps = len(sm_test_df) // batch_size,
        callbacks = [test_rt]
    )
    test_time = test_rt.process_time
    data['test_time'] = test_time
    data['test_loss'] = test_loss
    data['test_acc'] = test_acc

    return data

In [8]:
def round_val(val):
    return round(val, 3)

In [9]:
def save_model_data(train_data, test_data):
    data = dict()

    data['model'] = train_data['model']
    data['train_loss'] = round_val(train_data['train_loss'])
    data['train_acc'] = round_val(train_data['train_acc'])
    data['train_time'] = round_val(train_data['train_time'])
    data['val_loss'] = round_val(train_data['val_loss'])
    data['val_acc'] = round_val(train_data['val_acc'])
    data['test_loss'] = round_val(test_data['test_loss'])
    data['test_acc'] = round_val(test_data['test_acc'])
    data['test_time'] = round_val(test_data['test_time'])

    return data

# Load and import data

In [10]:
train_dir = 'data_files/train/'
test_dir = 'data_files/test/'

train_df = pd.read_csv('data_files/train.csv')
test_df = pd.read_csv('data_files/test.csv')

train_df = train_df.sort_values('filename')
test_df = test_df.sort_values('filename')

In [11]:
train_df.head()

Unnamed: 0.1,Unnamed: 0,id_code,experiment,plate,well,sirna,filename
0,0,HEPG2-01_1_B03,HEPG2-01,1,B03,513,HEPG2-01_1_B03_s1.jpeg
36515,36515,HEPG2-01_1_B03,HEPG2-01,1,B03,513,HEPG2-01_1_B03_s2.jpeg
1,1,HEPG2-01_1_B04,HEPG2-01,1,B04,840,HEPG2-01_1_B04_s1.jpeg
36516,36516,HEPG2-01_1_B04,HEPG2-01,1,B04,840,HEPG2-01_1_B04_s2.jpeg
2,2,HEPG2-01_1_B05,HEPG2-01,1,B05,1020,HEPG2-01_1_B05_s1.jpeg


In [12]:
test_df.head()

Unnamed: 0.1,Unnamed: 0,well_id,experiment,plate,well,filename,sirna_id
0,0,HEPG2-08_1_B03,HEPG2-08,1,B03,HEPG2-08_1_B03_s1.jpeg,855
19897,39794,HEPG2-08_1_B03,HEPG2-08,1,B03,HEPG2-08_1_B03_s2.jpeg,855
1,2,HEPG2-08_1_B04,HEPG2-08,1,B04,HEPG2-08_1_B04_s1.jpeg,710
19898,39796,HEPG2-08_1_B04,HEPG2-08,1,B04,HEPG2-08_1_B04_s2.jpeg,710
2,4,HEPG2-08_1_B05,HEPG2-08,1,B05,HEPG2-08_1_B05_s1.jpeg,836


# Preprocessing

## Select only HUVEC cells

In [13]:
sirna_subset = list(train_df[train_df.sirna <= 19].sirna.unique())
print(sirna_subset)
print(len(sirna_subset))

[6, 10, 8, 5, 3, 1, 2, 19, 11, 7, 4, 13, 14, 17, 18, 16, 0, 9, 15, 12]
20


In [14]:
sm_train_df = train_df[train_df.sirna.isin(sirna_subset)].copy()
sm_test_df = test_df[test_df.sirna_id.isin(sirna_subset)].copy()

In [15]:
# Split cell types to only HUVEC

sm_train_df = sm_train_df[sm_train_df.experiment.str.contains('HUVEC')]
sm_test_df = sm_test_df[sm_test_df.experiment.str.contains('HUVEC')]

In [16]:
sm_train_df.head(n=10)

Unnamed: 0.1,Unnamed: 0,id_code,experiment,plate,well,sirna,filename
7796,7796,HUVEC-01_1_D11,HUVEC-01,1,D11,1,HUVEC-01_1_D11_s1.jpeg
44311,44311,HUVEC-01_1_D11,HUVEC-01,1,D11,1,HUVEC-01_1_D11_s2.jpeg
7805,7805,HUVEC-01_1_D20,HUVEC-01,1,D20,3,HUVEC-01_1_D20_s1.jpeg
44320,44320,HUVEC-01_1_D20,HUVEC-01,1,D20,3,HUVEC-01_1_D20_s2.jpeg
7836,7836,HUVEC-01_1_F09,HUVEC-01,1,F09,5,HUVEC-01_1_F09_s1.jpeg
44351,44351,HUVEC-01_1_F09,HUVEC-01,1,F09,5,HUVEC-01_1_F09_s2.jpeg
7845,7845,HUVEC-01_1_F21,HUVEC-01,1,F21,10,HUVEC-01_1_F21_s1.jpeg
44360,44360,HUVEC-01_1_F21,HUVEC-01,1,F21,10,HUVEC-01_1_F21_s2.jpeg
7899,7899,HUVEC-01_1_I16,HUVEC-01,1,I16,8,HUVEC-01_1_I16_s1.jpeg
44414,44414,HUVEC-01_1_I16,HUVEC-01,1,I16,8,HUVEC-01_1_I16_s2.jpeg


In [17]:
sm_test_df.head(n=10)

Unnamed: 0.1,Unnamed: 0,well_id,experiment,plate,well,filename,sirna_id
4434,8868,HUVEC-17_1_B08,HUVEC-17,1,B08,HUVEC-17_1_B08_s1.jpeg,3
24331,48662,HUVEC-17_1_B08,HUVEC-17,1,B08,HUVEC-17_1_B08_s2.jpeg,3
4536,9072,HUVEC-17_1_G12,HUVEC-17,1,G12,HUVEC-17_1_G12_s1.jpeg,1
24433,48866,HUVEC-17_1_G12,HUVEC-17,1,G12,HUVEC-17_1_G12_s2.jpeg,1
4566,9132,HUVEC-17_1_I04,HUVEC-17,1,I04,HUVEC-17_1_I04_s1.jpeg,6
24463,48926,HUVEC-17_1_I04,HUVEC-17,1,I04,HUVEC-17_1_I04_s2.jpeg,6
4578,9156,HUVEC-17_1_I16,HUVEC-17,1,I16,HUVEC-17_1_I16_s1.jpeg,8
24475,48950,HUVEC-17_1_I16,HUVEC-17,1,I16,HUVEC-17_1_I16_s2.jpeg,8
4581,9162,HUVEC-17_1_I19,HUVEC-17,1,I19,HUVEC-17_1_I19_s1.jpeg,10
24478,48956,HUVEC-17_1_I19,HUVEC-17,1,I19,HUVEC-17_1_I19_s2.jpeg,10


## Create validation subset

In [18]:
val_num = len(sm_train_df) * 0.2 # take 20% of training set

val_df = sm_train_df.sample(int(val_num), random_state = 18)
filename_list = list(val_df.filename)

# Filter dataframes by randomly sampled rows
# Take some from various experiments with same cell line
# also need to shuffle sirna labels
sm_val_df = sm_train_df[sm_train_df.filename.isin(filename_list)]
sm_train_df = sm_train_df[~sm_train_df.filename.isin(filename_list)]
sm_val_df.head()

Unnamed: 0.1,Unnamed: 0,id_code,experiment,plate,well,sirna,filename
7796,7796,HUVEC-01_1_D11,HUVEC-01,1,D11,1,HUVEC-01_1_D11_s1.jpeg
44311,44311,HUVEC-01_1_D11,HUVEC-01,1,D11,1,HUVEC-01_1_D11_s2.jpeg
44320,44320,HUVEC-01_1_D20,HUVEC-01,1,D20,3,HUVEC-01_1_D20_s2.jpeg
7845,7845,HUVEC-01_1_F21,HUVEC-01,1,F21,10,HUVEC-01_1_F21_s1.jpeg
7899,7899,HUVEC-01_1_I16,HUVEC-01,1,I16,8,HUVEC-01_1_I16_s1.jpeg


## Data preprocessing

In [19]:
# Settings
batch_size = 32
img_height = 224
img_width = 224
num_outputs = 20

In [20]:
# Add some rotation and adjustments to images

train_datagen = ImageDataGenerator(
    rescale = 1./255,
    #validation_split = 0.2, # set validation set to 0.2
    #featurewise_center= True,
    #featurewise_std_normalization=True,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=90,
    #height_shift_range=[-0.08, 0.08],
    #width_shift_range=[-0.08,0.08],
    #brightness_range=[0.75, 1.1]
)

test_datagen = ImageDataGenerator(
    rescale = 1./255
)

In [21]:
sm_train_df['sirna'] = sm_train_df.sirna.apply(lambda x: str(x))
sm_test_df['sirna_id'] = sm_test_df.sirna_id.apply(lambda x: str(x))
sm_val_df['sirna'] = sm_val_df.sirna.apply(lambda x: str(x))

In [22]:
train_dir = '/home/specc/Documents/school_files/458_deep_learning/458_final_project/data_files/train/'

train_generator  = train_datagen.flow_from_dataframe(
    dataframe = sm_train_df,
    directory = train_dir,
    target_size = (img_height, img_width),
    subset='training',
    x_col='filename',
    y_col='sirna',
    class_mode='categorical',
    color_mode='rgb',
    shuffle = True,
    batch_size = batch_size
)

Found 512 validated image filenames belonging to 20 classes.


In [23]:
val_generator = test_datagen.flow_from_dataframe(
    dataframe = sm_val_df,
    directory = train_dir,
    target_size = (img_height, img_width),
    #subset = 'validation',
    x_col = 'filename',
    y_col = 'sirna',
    class_mode = 'categorical',
    color_mode = 'rgb',
    shuffle = False,
    batch_size = batch_size
)

Found 128 validated image filenames belonging to 20 classes.


In [24]:
# test_dir = 'data_files/test'

# test_generator = test_datagen.flow_from_dataframe(
#     dataframe = sm_test_df,
#     directory = test_dir,
#     target_size = (img_height, img_width),
#     x_col = 'filename',
#     y_col = 'sirna_id',
#     color_mode = 'rgb',
#     #class_mode = None
# )

In [25]:
test_dir = '/home/specc/Documents/school_files/458_deep_learning/458_final_project/data_files/test/'

test_generator = test_datagen.flow_from_dataframe(
    dataframe = sm_test_df,
    directory = test_dir,
    target_size = (224, 224),
    x_col='filename',
    y_col='sirna_id',
    mode='categorical',
    color_mode='rgb'
)

Found 320 validated image filenames belonging to 20 classes.


# Modeling Experiments

In [26]:
# store results
results = list()

## 1 layer CNN

In [27]:
model = Sequential([
    Conv2D(16, 3, padding='same', activation='relu', input_shape=(224, 224 ,3)),
    MaxPooling2D(),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(20, activation='softmax')
])

model_name = 'CNN (1 layer)'

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 16)      0         
_________________________________________________________________
flatten (Flatten)            (None, 200704)            0         
_________________________________________________________________
dense (Dense)                (None, 512)               102760960 
_________________________________________________________________
dense_1 (Dense)              (None, 20)                10260     
Total params: 102,771,668
Trainable params: 102,771,668
Non-trainable params: 0
_________________________________________________________________


In [None]:
opt = 'rmsprop'
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15

## 2 layer CNN

In [None]:
model = Sequential([
    Conv2D(32, 3, activation='relu', input_shape=(224, 224 ,3)),
    MaxPooling2D(),

    Conv2D(128, 3, activation='relu', input_shape=(224, 224 ,3)),
    MaxPooling2D(),

    Flatten(),
    Dense(512, activation='relu'),
    Dense(20, activation='softmax')
])

model_name = 'CNN (2 layers)'

model.summary()

In [None]:
opt = 'rmsprop'
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

In [None]:
pd.DataFrame(results)

# Base ResNet50

In [None]:
res_model = ResNet50(
    include_top=False,
    weights='imagenet',
    input_shape=(img_height, img_width, 3)
)

res_model.trainable = False

flat1 = Flatten()
class1 = Dense(512, activation='relu')
output = Dense(20, activation = 'softmax')

model = Sequential([
    res_model,
    flat1,
    class1,
    output
])

model_name = 'ResNet50 baseline'

model.summary()

In [None]:
opt = 'rmsprop'
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

# Same models, accelerated learning rate

In [None]:
opt = RMSprop(learning_rate=0.01)

## 2-layer CNN

In [35]:
model = Sequential([
    Conv2D(32, 3, activation='relu', input_shape=(224, 224 ,3)),
    MaxPooling2D(),

    Conv2D(128, 3, activation='relu', input_shape=(224, 224 ,3)),
    MaxPooling2D(),

    Flatten(),
    Dense(512, activation='relu'),
    Dense(20, activation='softmax')
])

model_name = 'CNN (2 layers) - 0.01 LR'

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 222, 222, 32)      896       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 109, 109, 128)     36992     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 54, 54, 128)       0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 373248)            0         
_________________________________________________________________
dense_6 (Dense)              (None, 512)               191103488 
_________________________________________________________________
dense_7 (Dense)              (None, 20)               

In [36]:
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


## Base ResNet50

In [37]:
res_model = ResNet50(
    include_top=False,
    weights='imagenet',
    input_shape=(img_height, img_width, 3)
)

res_model.trainable = False

flat1 = Flatten()
class1 = Dense(512, activation='relu')
output = Dense(20, activation = 'softmax')

model = Sequential([
    res_model,
    flat1,
    class1,
    output
])

model_name = 'ResNet50 baseline - 0.01 LR'

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 7, 7, 2048)        23587712  
_________________________________________________________________
flatten_4 (Flatten)          (None, 100352)            0         
_________________________________________________________________
dense_8 (Dense)              (None, 512)               51380736  
_________________________________________________________________
dense_9 (Dense)              (None, 20)                10260     
Total params: 74,978,708
Trainable params: 51,390,996
Non-trainable params: 23,587,712
_________________________________________________________________


In [38]:
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [39]:
pd.DataFrame(results)

Unnamed: 0,model,train_loss,train_acc,train_time,val_loss,val_acc,test_loss,test_acc,test_time
0,CNN (1 layer),2.157,0.33,188.325,2.409,0.234,2.661,0.181,0.996
1,CNN (2 layers),2.068,0.332,433.127,2.318,0.18,2.643,0.209,2.593
2,ResNet50 baseline,2.98,0.068,401.995,3.046,0.047,2.961,0.075,11.249
3,CNN (2 layers) - 0.01 LR,2.999,0.033,434.526,3.023,0.031,3.002,0.047,2.581
4,ResNet50 baseline - 0.01 LR,2.995,0.049,405.245,3.029,0.031,2.996,0.041,11.192


# Other base pretrained models

## Base InceptionV3

In [40]:
base_model = InceptionV3(include_top=False,
                    weights = 'imagenet',
                    input_shape=(img_height, img_width, 3),
                    classes = num_outputs,
                    classifier_activation='softmax')

flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    flat,
    output
])

base_model.trainable = False

model_name = 'InceptionV3 base'

In [41]:
opt = 'rmsprop'
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


## Base Xception

In [42]:
base_model = Xception(include_top= False,
                    weights = 'imagenet',
                    input_shape = (img_height, img_width, 3),
                    classes = num_outputs)

flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    flat,
    output
])

base_model.trainable = False

model_name = 'Base Xception'

In [43]:
opt = 'rmsprop'
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


## Base VGG16

In [44]:
base_model = VGG16(
    include_top=False,
    weights='imagenet',
    input_shape=(img_height, img_width, 3),
    classes=num_outputs
)

flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    flat,
    output
])

base_model.trainable = False

model_name = 'Base VGG16'

In [45]:
opt = 'rmsprop'
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [46]:
pd.DataFrame(results)

Unnamed: 0,model,train_loss,train_acc,train_time,val_loss,val_acc,test_loss,test_acc,test_time
0,CNN (1 layer),2.157,0.33,188.325,2.409,0.234,2.661,0.181,0.996
1,CNN (2 layers),2.068,0.332,433.127,2.318,0.18,2.643,0.209,2.593
2,ResNet50 baseline,2.98,0.068,401.995,3.046,0.047,2.961,0.075,11.249
3,CNN (2 layers) - 0.01 LR,2.999,0.033,434.526,3.023,0.031,3.002,0.047,2.581
4,ResNet50 baseline - 0.01 LR,2.995,0.049,405.245,3.029,0.031,2.996,0.041,11.192
5,InceptionV3 base,10.032,0.438,198.577,16.704,0.289,18.8,0.281,6.34
6,Base Xception,7.567,0.475,380.097,11.698,0.336,12.736,0.306,12.422
7,Base VGG16,2.004,0.477,917.364,2.819,0.359,2.612,0.363,30.367


## Unfreeze top 25 layers of InceptionV3

In [27]:
base_model = InceptionV3(include_top=False,
                    weights = 'imagenet',
                    input_shape=(img_height, img_width, 3))

flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    flat,
    output
])

model_name = 'InceptionV3 top 25 layers trainable'

for layer in base_model.layers[:-24]:
    layer.trainable = True

In [29]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


## Unfreeze top 25 layers of Xception

In [32]:
base_model = Xception(include_top= False,
                    weights = 'imagenet',
                    input_shape = (img_height, img_width, 3))

flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    flat,
    output
])

model_name = 'Xception top 25 layers trainable'

for layer in base_model.layers[:-24]:
    layer.trainable = True

In [33]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [None]:
pd.DataFrame(results)

## Unfreeze top 25 layers of VGG16

In [None]:
base_model = VGG16(
    include_top=False,
    weights='imagenet',
    input_shape=(img_height, img_width, 3)
)

flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    flat,
    output
])

#base_model.trainable = False

model_name = 'VGG16 top 25 layers trainable'

for layer in base_model.layers[:-24]:
    layer.trainable = True

In [None]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

# Add Global Average Pooling Layer

## Xception - Global Average Pooling

In [27]:
base_model = Xception(include_top= False,
                    weights = 'imagenet',
                    input_shape = (img_height, img_width, 3))

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

model_name = 'Xception w GlobalAvgPooling'

for layer in base_model.layers:
    layer.trainable = False

In [28]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


## InceptionV3 with GlobalAvgPooling

In [29]:
base_model = InceptionV3(include_top=False,
                    weights = 'imagenet',
                    input_shape=(img_height, img_width, 3))

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

model_name = 'InceptionV3 w GlobalAvgPooling'

for layer in base_model.layers:
    layer.trainable = False

In [30]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


## VGG16 with and GlobalAvgPooling

In [31]:
base_model = VGG16(
    include_top=False,
    weights='imagenet',
    input_shape=(img_height, img_width, 3)
)

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

#base_model.trainable = False

model_name = 'VGG16 w GlobalAvgPooling'

for layer in base_model.layers:
    layer.trainable = False

In [32]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [33]:
pd.DataFrame(results)

Unnamed: 0,model,train_loss,train_acc,train_time,val_loss,val_acc,test_loss,test_acc,test_time
0,Xception w GlobalAvgPooling,1.533,0.551,382.221,1.99,0.398,2.046,0.347,12.842
1,InceptionV3 w GlobalAvgPooling,1.475,0.541,207.331,2.113,0.383,2.195,0.35,6.458
2,VGG16 w GlobalAvgPooling,2.412,0.41,950.336,2.525,0.305,2.451,0.353,31.041


## Xception with 15 trainable layers and GlobalAvgPooling

In [26]:
base_model = Xception(include_top= False,
                    weights = 'imagenet',
                    input_shape = (img_height, img_width, 3))

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

model_name = 'Xception (15 trainable layers) w GlobalAvgPooling'

for layer in base_model.layers[:-15]:
    layer.trainable = True

In [27]:
opt = RMSprop(learning_rate = 0.00001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/15
Epoch 2/15

# Adjust datagen parameters

## Introduce brightness range

In [27]:
# Add some rotation and adjustments to images

train_datagen = ImageDataGenerator(
    rescale = 1./255,
    #validation_split = 0.2, # set validation set to 0.2
    featurewise_center= True,
    featurewise_std_normalization=True,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=90,
    #height_shift_range=[-0.08, 0.08],
    #width_shift_range=[-0.08,0.08],
    brightness_range=[0.75, 1.1]
)

test_datagen = ImageDataGenerator(
    rescale = 1./255
)

In [28]:
train_dir = '/home/specc/Documents/school_files/458_deep_learning/458_final_project/data_files/train/'

train_generator  = train_datagen.flow_from_dataframe(
    dataframe = sm_train_df,
    directory = train_dir,
    target_size = (img_height, img_width),
    subset='training',
    x_col='filename',
    y_col='sirna',
    class_mode='categorical',
    color_mode='rgb',
    shuffle = True,
    batch_size = batch_size
)

Found 512 validated image filenames belonging to 20 classes.


# Test best performing models

### Xception with GlobalAvgPooling

In [29]:
base_model = Xception(include_top= False,
                    weights = 'imagenet',
                    input_shape = (img_height, img_width, 3))

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

model_name = 'Xception w GlobalAvgPooling'

for layer in base_model.layers:
    layer.trainable = False

In [30]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### InceptionV3 with GlobalAveragePooling

In [31]:
base_model = InceptionV3(include_top=False,
                    weights = 'imagenet',
                    input_shape=(img_height, img_width, 3))

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

model_name = 'InceptionV3 with GlobalAvgPooling'

for layer in base_model.layers:
    layer.trainable = False

In [32]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [33]:
pd.DataFrame(results)

Unnamed: 0,model,train_loss,train_acc,train_time,val_loss,val_acc,test_loss,test_acc,test_time
0,Xception w GlobalAvgPooling,1.439,0.586,536.047,1.995,0.406,1.974,0.394,13.104
1,InceptionV3 with GlobalAvgPooling,1.365,0.557,281.862,2.073,0.43,2.224,0.378,6.828


## Introduce height shift

In [34]:
# Add some rotation and adjustments to images

train_datagen = ImageDataGenerator(
    rescale = 1./255,
    #validation_split = 0.2, # set validation set to 0.2
    #featurewise_center= True,
    #featurewise_std_normalization=True,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=90,
    height_shift_range=[-0.08, 0.08],
    #width_shift_range=[-0.08,0.08],
    #brightness_range=[0.75, 1.1]
)

train_generator  = train_datagen.flow_from_dataframe(
    dataframe = sm_train_df,
    directory = train_dir,
    target_size = (img_height, img_width),
    subset='training',
    x_col='filename',
    y_col='sirna',
    class_mode='categorical',
    color_mode='rgb',
    shuffle = True,
    batch_size = batch_size
)

Found 512 validated image filenames belonging to 20 classes.


## Test best models

### Xception with GlobalAvgPooling

In [35]:
base_model = Xception(include_top= False,
                    weights = 'imagenet',
                    input_shape = (img_height, img_width, 3))

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

model_name = 'Xception w GlobalAvgPooling'

for layer in base_model.layers:
    layer.trainable = False

In [36]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [37]:
pd.DataFrame(results)

Unnamed: 0,model,train_loss,train_acc,train_time,val_loss,val_acc,test_loss,test_acc,test_time
0,Xception w GlobalAvgPooling,1.439,0.586,536.047,1.995,0.406,1.974,0.394,13.104
1,InceptionV3 with GlobalAvgPooling,1.365,0.557,281.862,2.073,0.43,2.224,0.378,6.828
2,Xception w GlobalAvgPooling,1.526,0.568,527.488,2.144,0.391,2.102,0.347,13.21


In [38]:
base_model = InceptionV3(include_top=False,
                    weights = 'imagenet',
                    input_shape=(img_height, img_width, 3))

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

model_name = 'InceptionV3 trainable with GlobalAvgPooling'

for layer in base_model.layers:
    layer.trainable = False

In [39]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Introduce width shift

In [51]:
# Add some rotation and adjustments to images

train_datagen = ImageDataGenerator(
    rescale = 1./255,
    #validation_split = 0.2, # set validation set to 0.2
    #featurewise_center= True,
    #featurewise_std_normalization=True,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=90,
    #height_shift_range=[-0.08, 0.08],
    width_shift_range=[-0.08,0.08],
    #brightness_range=[0.75, 1.1]
)

train_generator  = train_datagen.flow_from_dataframe(
    dataframe = sm_train_df,
    directory = train_dir,
    target_size = (img_height, img_width),
    subset='training',
    x_col='filename',
    y_col='sirna',
    class_mode='categorical',
    color_mode='rgb',
    shuffle = True,
    batch_size = batch_size
)

Found 512 validated image filenames belonging to 20 classes.


### Xception and GlobalAvgPooling

In [52]:
base_model = Xception(include_top= False,
                    weights = 'imagenet',
                    input_shape = (img_height, img_width, 3))

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

model_name = 'Xception w GlobalAvgPooling & width shift'

for layer in base_model.layers:
    layer.trainable = False

In [53]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [54]:
model.save('xception.h5')

### InceptionV3 with GlobalAvgPooling

In [55]:
base_model = InceptionV3(include_top=False,
                    weights = 'imagenet',
                    input_shape=(img_height, img_width, 3))

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

model_name = 'InceptionV3 w GlobalAvgPooling & width shift'

for layer in base_model.layers:
    layer.trainable = False

In [56]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [57]:
model.save('inception.h5')

In [58]:
pd.DataFrame(results)

Unnamed: 0,model,train_loss,train_acc,train_time,val_loss,val_acc,test_loss,test_acc,test_time
0,Xception w GlobalAvgPooling,1.439,0.586,536.047,1.995,0.406,1.974,0.394,13.104
1,InceptionV3 with GlobalAvgPooling,1.365,0.557,281.862,2.073,0.43,2.224,0.378,6.828
2,Xception w GlobalAvgPooling,1.526,0.568,527.488,2.144,0.391,2.102,0.347,13.21
3,InceptionV3 trainable with GlobalAvgPooling,1.404,0.566,280.952,2.123,0.406,2.177,0.369,6.589
4,Xception w GlobalAvgPooling & width shift,1.484,0.557,525.483,1.982,0.406,2.035,0.397,13.099
5,InceptionV3 w GlobalAvgPooling & width shift,1.366,0.562,280.24,2.035,0.422,2.099,0.409,6.617
6,InceptionV3 w GlobalAvgPooling & width shift,1.278,0.605,277.146,2.159,0.422,2.179,0.369,6.578
7,Xception w GlobalAvgPooling & brightness shift,1.382,0.605,517.894,1.916,0.422,2.017,0.384,12.895
8,Xception w GlobalAvgPooling & width shift,1.49,0.549,534.56,2.064,0.367,2.045,0.403,12.887
9,InceptionV3 w GlobalAvgPooling & width shift,1.426,0.551,281.818,2.184,0.398,2.146,0.378,6.976


# Brightness shift

In [46]:
# Add some rotation and adjustments to images

train_datagen = ImageDataGenerator(
    rescale = 1./255,
    #validation_split = 0.2, # set validation set to 0.2
    #featurewise_center= True,
    #featurewise_std_normalization=True,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=90,
    #height_shift_range=[-0.08, 0.08],
    #width_shift_range=[-0.08,0.08],
    brightness_range=[0.90, 1.1]
)

train_generator  = train_datagen.flow_from_dataframe(
    dataframe = sm_train_df,
    directory = train_dir,
    target_size = (img_height, img_width),
    subset='training',
    x_col='filename',
    y_col='sirna',
    class_mode='categorical',
    color_mode='rgb',
    shuffle = True,
    batch_size = batch_size
)

Found 512 validated image filenames belonging to 20 classes.


## InceptionV3 with GlobalAvgPooling

In [47]:
base_model = InceptionV3(include_top=False,
                    weights = 'imagenet',
                    input_shape=(img_height, img_width, 3))

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

model_name = 'InceptionV3 w GlobalAvgPooling & brightness shift'

for layer in base_model.layers:
    layer.trainable = False

In [48]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Xception with GlobalAvgPooling

In [49]:
base_model = Xception(include_top= False,
                    weights = 'imagenet',
                    input_shape = (img_height, img_width, 3))

pooling = GlobalAveragePooling2D()
flat = Flatten()
output = Dense(20, activation='softmax')

model = Sequential([
    base_model,
    pooling,
    flat,
    output
])

model_name = 'Xception w GlobalAvgPooling & brightness shift'

for layer in base_model.layers:
    layer.trainable = False

In [50]:
opt = RMSprop(learning_rate = 0.0001, momentum = 0.9)
train_data, history_dict = train_model(model, opt, train_generator, val_generator, model_name)
test_data = test_model(model, test_generator)
results.append(save_model_data(train_data, test_data))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
