## Imports

In [1]:
# adds parent directory to python path so we can access code located there
import os, sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path: sys.path.append(nb_dir)
    
# core imports
from ohmeow_ml.keras_tf_util import *

# configure matplotlib
%matplotlib inline
    
# configure autoreload to re-load changed modules
%load_ext autoreload
%autoreload 2

Using TensorFlow backend.


## Define paths and global variables

In [2]:
current_dir = os.getcwd()
DATA_HOME_DIR = current_dir + '/data/'
DATA_CLASSES = [ dir for dir in os.listdir(DATA_HOME_DIR+'train') ]

# path = DATA_HOME_DIR
path = DATA_HOME_DIR + 'sample/'
sample_path = DATA_HOME_DIR + 'sample/'

train_path = path + 'train/'
val_path = path + 'valid/'
test_path = path + 'test/'

models_path = path + 'models/'                      # save weights here
results_path = path + 'results/'                    # save predictions here
processed_data_path = path + 'preprocesed_data/'    # save preprocessed data used for training here

if not os.path.exists(models_path): os.makedirs(models_path)
if not os.path.exists(results_path): os.makedirs(results_path)
if not os.path.exists(processed_data_path): os.makedirs(processed_data_path)

In [3]:
batch_size = 4 #64

## Preprocess the data

We can save time by pre-processing the images (e.g., converting them to jpegs, resizing to 224x224) and saving them as a numpy array on the file system.  We can do the same for the train, validation, and test image class designations, filenames, and one-hot encoded labels

In [4]:
# get classes, one-hot encoded labels, and filenames
train_classes, train_labels, train_filenames = get_batch_info(train_path)
val_classes, val_labels, val_filenames = get_batch_info(val_path)
test_filenames = get_batch_info(test_path)[2]

Found 1500 images belonging to 10 classes.
Found 750 images belonging to 10 classes.
Found 500 images belonging to 1 classes.


In [5]:
 # get image data
if not os.path.exists(processed_data_path+'train_data.bc'):
    train_data = get_data(train_path)
    save_array(processed_data_path+'train_data.bc', train_data)
else:
    train_data = load_array(processed_data_path+'train_data.bc')
    print('training data loaded ...')

if not os.path.exists(processed_data_path+'val_data.bc'):
    val_data = get_data(val_path)
    save_array(processed_data_path+'val_data.bc', val_data)
else:
    val_data = load_array(processed_data_path+'val_data.bc')
    print('validation data loaded ...')

if not os.path.exists(processed_data_path+'test_data.bc'):
    test_data = get_data(test_path)
    save_array(processed_data_path+'test_data.bc', test_data)
else:
    test_data = load_array(processed_data_path+'test_data.bc')
    print('test data loaded ...')

training data loaded ...
validation data loaded ...
test data loaded ...


Create training/validation batches and also define "steps per epoch" for each ... defines the # of batches per epoch (see `model.fit_generator()`).

***ONLY RUN THIS CODE IF YOU NEED TO USE BATCHES INSTEAD OF PERSISTED IMAGE ARRAYS***

In [6]:
# OPTION 1: BUILD BATCHES FROM FILE SYSTEM
# train_batches = get_batches(train_path, batch_size=batch_size)
# val_batches = get_batches(val_path, batch_size=batch_size*2, shuffle=False)

# OPTION 2: BUILD BATCHES FROM IMAGE ARRAYS
# gen = image.ImageDataGenerator()
# train_batches = gen.flow(train_data, train_labels, batch_size=batch_size, shuffle=True)
# val_batches = gen.flow(val_data, val_labels, batch_size=batch_size*2, shuffle=False)

# DEFINE # OF STEPS TO TAKE IN FITTING BATCHES FOR BOTH TRAINING AND VALIDATION EXAMPLES
# epoch_steps = math.ceil(train_batches.n/train_batches.batch_size)
# val_steps = math.ceil(val_batches.n/val_batches.batch_size)

## Simple CNN

2 conv layers with max pooling + a simple dense network is a good simple CNN to start with

In [7]:
def simple_cnn():
    model = Sequential([
        BatchNormalization(axis=1, input_shape=(224,224,3)),
        Conv2D(32, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        MaxPooling2D((3,3)),
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        MaxPooling2D((3,3)),
        Flatten(),
        Dense(200, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
    ])

    model.compile(Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

### Without Data Augmentation

In [8]:
limit_mem()
model = simple_cnn()

In [9]:
model.fit(train_data, train_labels, batch_size=batch_size, epochs=2, shuffle=True, 
          validation_data=(val_data, val_labels), verbose=2)

Train on 1500 samples, validate on 750 samples
Epoch 1/2
61s - loss: 1.9962 - acc: 0.3373 - val_loss: 2.3012 - val_acc: 0.2800
Epoch 2/2
59s - loss: 1.1325 - acc: 0.6647 - val_loss: 2.5793 - val_acc: 0.2627


<keras.callbacks.History at 0x1ead3b81e80>

In [10]:
model.optimizer.lr = 0.001
model.fit(train_data, train_labels, batch_size=batch_size, epochs=5, shuffle=True, 
          validation_data=(val_data, val_labels), verbose=2)

Train on 1500 samples, validate on 750 samples
Epoch 1/5
59s - loss: 0.7972 - acc: 0.8013 - val_loss: 2.0110 - val_acc: 0.3547
Epoch 2/5
59s - loss: 0.5998 - acc: 0.8620 - val_loss: 2.0129 - val_acc: 0.3907
Epoch 3/5
59s - loss: 0.4617 - acc: 0.9140 - val_loss: 1.8635 - val_acc: 0.4227
Epoch 4/5
59s - loss: 0.4065 - acc: 0.9187 - val_loss: 1.9141 - val_acc: 0.4013
Epoch 5/5
59s - loss: 0.3253 - acc: 0.9407 - val_loss: 2.0225 - val_acc: 0.4027


<keras.callbacks.History at 0x1ead3bac208>

### With Data Augmentation

In [11]:
# get the best values
df_augs = pd.read_csv(sample_path+'data_augmentation_results.csv')
df_augs.sort_values('val_acc', ascending=False).groupby('aug').first()

Unnamed: 0_level_0,aug_val,train_loss,train_acc,val_loss,val_acc
aug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
channel_shift_range,0.0,0.392873,0.926667,2.012688,0.413778
height_shift_range,0.0,0.403766,0.918444,1.865438,0.432889
rotation_range,0.0,0.379833,0.926,1.768067,0.437778
shear_range,0.1,0.540318,0.870222,1.913431,0.453333
width_shift_range,0.05,0.853956,0.752667,2.134572,0.371111
zoom_range,0.0,0.372285,0.927111,2.034861,0.42


In [12]:
gen_aug = image.ImageDataGenerator(channel_shift_range=0.0, height_shift_range=0.0, rotation_range=0.0, 
                                   shear_range=0.10, width_shift_range=0.05, zoom_range=0.0)

aug_batches = gen_aug.flow(train_data, train_labels, batch_size=batch_size, shuffle=True)

In [13]:
limit_mem()
model = simple_cnn()

In [14]:
epoch_steps = math.ceil(aug_batches.n/aug_batches.batch_size)
model.fit_generator(aug_batches, epoch_steps, epochs=2, validation_data=(val_data, val_labels), verbose=2)

Epoch 1/2
60s - loss: 2.2428 - acc: 0.2433 - val_loss: 2.4668 - val_acc: 0.1587
Epoch 2/2
59s - loss: 1.6098 - acc: 0.4487 - val_loss: 2.2202 - val_acc: 0.3160


<keras.callbacks.History at 0x1eaf68fef60>

In [15]:
model.optimizer.lr = 0.001
model.fit_generator(aug_batches, epoch_steps, epochs=4, validation_data=(val_data, val_labels), verbose=2)

Epoch 1/4
59s - loss: 1.3402 - acc: 0.5620 - val_loss: 2.1860 - val_acc: 0.3480
Epoch 2/4
59s - loss: 1.1932 - acc: 0.6207 - val_loss: 2.1291 - val_acc: 0.3587
Epoch 3/4
59s - loss: 1.0703 - acc: 0.6740 - val_loss: 2.1869 - val_acc: 0.3413
Epoch 4/4
59s - loss: 0.9866 - acc: 0.6987 - val_loss: 1.9671 - val_acc: 0.4467


<keras.callbacks.History at 0x1ead3f856d8>

In [16]:
model.optimizer.lr = 0.0001
model.fit_generator(aug_batches, epoch_steps, epochs=15, validation_data=(val_data, val_labels), verbose=2)

Epoch 1/15
59s - loss: 0.8422 - acc: 0.7500 - val_loss: 1.9100 - val_acc: 0.4267
Epoch 2/15
59s - loss: 0.8300 - acc: 0.7587 - val_loss: 1.9486 - val_acc: 0.4080
Epoch 3/15
59s - loss: 0.7511 - acc: 0.7793 - val_loss: 1.8466 - val_acc: 0.4453
Epoch 4/15
59s - loss: 0.7145 - acc: 0.7960 - val_loss: 1.8369 - val_acc: 0.4627
Epoch 5/15
59s - loss: 0.6332 - acc: 0.8220 - val_loss: 1.9578 - val_acc: 0.4467
Epoch 6/15
59s - loss: 0.6237 - acc: 0.8280 - val_loss: 2.0271 - val_acc: 0.4213
Epoch 7/15
59s - loss: 0.6231 - acc: 0.8187 - val_loss: 1.9597 - val_acc: 0.4613
Epoch 8/15
59s - loss: 0.5779 - acc: 0.8467 - val_loss: 1.9437 - val_acc: 0.4400
Epoch 9/15
59s - loss: 0.5020 - acc: 0.8527 - val_loss: 2.0274 - val_acc: 0.4373
Epoch 10/15
59s - loss: 0.5151 - acc: 0.8607 - val_loss: 2.0055 - val_acc: 0.4707
Epoch 11/15
59s - loss: 0.4504 - acc: 0.8800 - val_loss: 2.0425 - val_acc: 0.4813
Epoch 12/15
59s - loss: 0.4357 - acc: 0.8840 - val_loss: 1.9533 - val_acc: 0.4947
Epoch 13/15
59s - loss: 0

<keras.callbacks.History at 0x1eaf92a7cf8>

## Complex CNN Architecutre

We are adding in regularization via Dropout so this will work better on full data set

In [24]:
def complex_cnn(p_do=0.5, n_dense_outputs=256):
    model = Sequential([
        BatchNormalization(axis=1, input_shape=(224,224,3)),
        Conv2D(32, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        MaxPooling2D((3,3)),
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        MaxPooling2D((3,3)),
        Conv2D(128, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        MaxPooling2D((3,3)),
        
        Flatten(),
        Dense(n_dense_outputs, activation='relu'),
        BatchNormalization(),
        Dropout(p_do/2),
        Dense(n_dense_outputs, activation='relu'),
        BatchNormalization(),
        Dropout(p_do),
        Dense(10, activation='softmax')
    ])

    model.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [25]:
limit_mem()
model = complex_cnn()

In [26]:
gen_aug = image.ImageDataGenerator(channel_shift_range=0.0, height_shift_range=0.0, rotation_range=0.0, 
                                   shear_range=0.10, width_shift_range=0.05, zoom_range=0.0)

aug_batches = gen_aug.flow(train_data, train_labels, batch_size=batch_size, shuffle=True)
epoch_steps = math.ceil(aug_batches.n/aug_batches.batch_size)

In [27]:
model.fit_generator(aug_batches, epoch_steps, epochs=2, validation_data=(val_data, val_labels), verbose=2)

Epoch 1/2
59s - loss: 3.1382 - acc: 0.1027 - val_loss: 3.1901 - val_acc: 0.1200
Epoch 2/2
55s - loss: 2.9258 - acc: 0.1433 - val_loss: 3.3729 - val_acc: 0.1493


<keras.callbacks.History at 0x1eb019dcac8>

In [28]:
model.optimizer.lr = 0.001
model.fit_generator(aug_batches, epoch_steps, epochs=5, validation_data=(val_data, val_labels), verbose=2)

Epoch 1/5
56s - loss: 2.7414 - acc: 0.1533 - val_loss: 3.3178 - val_acc: 0.1520
Epoch 2/5
55s - loss: 2.6023 - acc: 0.1793 - val_loss: 3.0626 - val_acc: 0.1787
Epoch 3/5
55s - loss: 2.4840 - acc: 0.1973 - val_loss: 2.7957 - val_acc: 0.1720
Epoch 4/5
55s - loss: 2.4386 - acc: 0.2047 - val_loss: 2.7143 - val_acc: 0.1773
Epoch 5/5
55s - loss: 2.4602 - acc: 0.2087 - val_loss: 2.6656 - val_acc: 0.1960


<keras.callbacks.History at 0x1eb01ed9eb8>

In [29]:
model.optimizer.lr = 0.00001
model.fit_generator(aug_batches, epoch_steps, epochs=10, validation_data=(val_data, val_labels), verbose=2)

Epoch 1/10
56s - loss: 2.3549 - acc: 0.2387 - val_loss: 2.5904 - val_acc: 0.2080
Epoch 2/10
55s - loss: 2.2514 - acc: 0.2540 - val_loss: 2.6188 - val_acc: 0.2147
Epoch 3/10
55s - loss: 2.1452 - acc: 0.2940 - val_loss: 2.6114 - val_acc: 0.2187
Epoch 4/10


KeyboardInterrupt: 

## Experiments

### Pre-compute output from various layers to use as input in various experiments

#### 1. Pre-compute output from VGG's 2nd to last layer

In [None]:
limit_mem()
model = VGG19(weights='imagenet', include_top=True)

In [None]:
# pop last layer and set model.outputs = to that of the now last layer
model.layers.pop()

# model.layers[-1].outbound_nodes = [] ... this is not needed
model.outputs = [model.layers[-1].output]

In [None]:
# model.summary()

In [None]:
if not os.path.exists(processed_data_path+'train_features_ft_2nd_to_ll.bc'):
    train_features_ft = model.predict(train_data, 4)
    val_features_ft = model.predict(val_data, 4)
    
    save_array(processed_data_path+'train_features_ft_2nd_to_ll.bc', train_features_ft)
    save_array(processed_data_path+'val_features_ft_2nd_to_ll.bc', val_features_ft)
else:
    train_features_ft = load_array(processed_data_path+'train_features_ft_2nd_to_ll.bc')
    val_features_ft = load_array(processed_data_path+'val_features_ft_2nd_to_ll.bc')
    
print(train_features_ft.shape)
print(val_features_ft.shape)

#### 2. Pre-compute output from convolutional layers

In [None]:
limit_mem()
model = VGG19(include_top=False, weights='imagenet')

In [None]:
# model.summary()

In [None]:
if not os.path.exists(processed_data_path+'train_features_ft_conv.bc'):
    train_features_ft = model.predict(train_data, 4)
    val_features_ft = model.predict(val_data, 4)
    
    save_array(processed_data_path+'train_features_ft_conv.bc', train_features_ft)
    save_array(processed_data_path+'val_features_ft_conv.bc', val_features_ft)
else:
    train_features_ft = load_array(processed_data_path+'train_features_ft_conv.bc')
    val_features_ft = load_array(processed_data_path+'val_features_ft_conv.bc')
    
print(train_features_ft.shape)
print(val_features_ft.shape)

### 1. Train a linear classifier using the pre-computed output from 2nd to last layer

In [None]:
limit_mem()
model = VGG19(include_top=True, weights='imagenet')
model.layers.pop()
model.outputs = [model.layers[-1].output]

train_features_ft = load_array(processed_data_path+'train_features_ft_2nd_to_ll.bc')
val_features_ft = load_array(processed_data_path+'val_features_ft_2nd_to_ll.bc')

In [None]:
def build_lm_from_vgg_2ll():
    m = Sequential([
        Dense(10, activation='softmax', input_shape = model.layers[-1].output_shape[1:])
    ])
    
    m.compile(optimizer=Adam(lr=1e-05), loss='categorical_crossentropy', metrics=['accuracy'])
    return m

In [None]:
lm = build_lm_from_vgg_2ll()

In [None]:
lm.fit(train_features_ft, train_labels, batch_size=batch_size, epochs=12, shuffle=True,
       validation_data=(val_features_ft, val_labels), verbose=2)

In [None]:
lm.optimizer.lr = 0.01

### Option 2: Train model after replacing last layer with a Dense layer having 10 outputs

In [None]:
limit_mem()
model = VGG19(weights='imagenet', include_top=True)
# model.summary()

In [None]:
model = finetune(model, 10)
# model.summary()