## Imports

In [1]:
# adds parent directory to python path so we can access code located there
import os, sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path: sys.path.append(nb_dir)
    
# core imports
from ohmeow_ml.keras_tf_util import *

# configure matplotlib
%matplotlib inline
    
# configure autoreload to re-load changed modules
%load_ext autoreload
%autoreload 2

Using TensorFlow backend.


## Define paths and global variables

In [2]:
current_dir = os.getcwd()
DATA_HOME_DIR = current_dir + '/data/'
DATA_CLASSES = [ dir for dir in os.listdir(DATA_HOME_DIR+'train') ]

# path = DATA_HOME_DIR
path = DATA_HOME_DIR + 'sample/'

train_path = path + 'train/'
val_path = path + 'valid/'
test_path = DATA_HOME_DIR + 'test/'

models_path = path + 'models/'                      # save weights here
results_path = path + 'results/'                    # save predictions here
processed_data_path = path + 'preprocesed_data/'    # save preprocessed data used for training here

if not os.path.exists(models_path): os.makedirs(models_path)
if not os.path.exists(results_path): os.makedirs(results_path)
if not os.path.exists(processed_data_path): os.makedirs(processed_data_path)

In [3]:
batch_size = 4 #64

## Preprocess the data

We can save time by pre-processing the images (e.g., converting them to jpegs, resizing to 224x224) and saving them as a numpy array on the file system.  We can do the same for the train, validation, and test image class designations, filenames, and one-hot encoded labels

In [4]:
# get classes, one-hot encoded labels, and filenames
train_classes, train_labels, train_filenames = get_batch_info(train_path)
val_classes, val_labels, val_filenames = get_batch_info(val_path)
test_filenames = get_batch_info(test_path)[2]

Found 1000 images belonging to 10 classes.
Found 300 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


In [5]:
# get image data
if not os.path.exists(processed_data_path+'train_data.bc'):
    train_data = get_data(train_path)
    save_array(processed_data_path+'train_data.bc', train_data)
else:
    train_data = load_array(path+'train_data.bc')
    
if not os.path.exists(processed_data_path+'val_data.bc'):
    val_data = get_data(val_path)
    save_array(processed_data_path+'val_data.bc', val_data)
else:
    val_data = load_array(path+'val_data.bc')
    
if not os.path.exists(processed_data_path+'test_data.bc'):
    test_data = get_data(test_path)
    save_array(processed_data_path+'test_data.bc', test_data)
else:
    test_data = load_array(path+'test_data.bc')

Found 1000 images belonging to 10 classes.
Found 300 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


## FineTune and Train VGG19

### Option 1: Precompute output from VGG's 2nd to last layer and use it to train a linear classifier.

In [9]:
limit_mem()
model = VGG19(weights='imagenet', include_top=True)
# model.summary()

#### Pop last layer and set model.outputs = to that of the previously 2nd-to-last layer

In [10]:
model.layers.pop()

# model.layers[-1].outbound_nodes = [] ... this is not needed
model.outputs = [model.layers[-1].output]

In [14]:
# model.summary()

#### Precompute output for update model

In [15]:
if not os.path.exists(processed_data_path+'train_features_ft.bc'):
    train_features_ft = model.predict(train_data, batch_size)
    val_features_ft = model.predict(val_data, batch_size)
    
    save_array(processed_data_path+'train_features_ft.bc', train_data)
    save_array(processed_data_path+'val_features_ft.bc', val_features_ft)
else:
    train_features_ft = load_array(path+'train_features_ft.bc')
    val_features_ft = load_array(path+'val_features_ft.bc')
    
print(train_features.shape)
print(val_features.shape)

(1000, 4096)
(300, 4096)


#### Train a linear classifier using the precomputed features

### Option 2: Train model after replacing last layer with a Dense layer having 10 outputs

In [None]:
limit_mem()
model = VGG19(weights='imagenet', include_top=True)
# model.summary()

In [None]:
model = finetune(model, 10)
# model.summary()