In [None]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.15
session = tf.Session(config=config)
tf.keras.backend.set_session(session)

In [None]:
import numpy as np
import os
from keras.models import Sequential, Model
from keras.layers import Conv1D, Flatten, Dense, Activation, MaxPooling1D, Cropping1D, ZeroPadding1D, Input, multiply, Dropout
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint
from keras.models import model_from_json
from sklearn.metrics import classification_report
from random import shuffle
from keras import backend as K
from tqdm import tqdm

In [None]:
def pretrained_model():
  
    inputs = Input(shape=(251,2))
    
    padding = ZeroPadding1D((3,2), input_shape = (251, 2), name='Padding')(inputs)

    conv1 = Conv1D(filters=16, kernel_size=(15), activation='relu', padding='same', name='Convolution1')
    conv1.trainable = False
    conv1 = conv1(padding)
    pool1 = MaxPooling1D(strides=2, pool_size=2, padding='same', name='Pooling1')(conv1)
    
    conv2 = Conv1D(filters=32, kernel_size=(3), activation='relu', padding='same', name='Convolution2')
    conv2.trainable = False
    conv2 = conv2(pool1)
    pool2 = MaxPooling1D(strides=2, pool_size=2, padding='same', name='Pooling2')(conv2)
    
    conv3 = Conv1D(filters=64, kernel_size=(3), activation='relu', padding='same', name='Convolution3')
    conv3.trainable = False
    conv3 = conv3(pool2)
    pool3 = MaxPooling1D(strides=2, pool_size=2, padding='same', name='Pooling3')(conv3)
    
    flat = Flatten()(pool3)
    
    #dropout1 = Dropout(0.2)(flat)
    
    dense1 = Dense(units=128, name='Dense1')
    dense1.trainable = True
    dense1 = dense1(flat)
    dense1 = Activation('relu')(dense1)
    
    #dropout2 = Dropout(0.5)(dense1)
    
    output = Dense(4, activation='softmax')(dense1)
    model = Model(input=[inputs], output=output)
    
    model.summary()
    return model

In [None]:
def load_pretrained_weights(model, path_to_weights, path_to_architecture):

    # Model reconstruction from JSON file
    with open(path_to_architecture, 'r') as f:
        autoencoder = model_from_json(f.read())
    
    # Load weights into the model
    autoencoder.load_weights(path_to_weights)
    
    # extract relevant pretrained weights
    conv1_weights = autoencoder.get_layer('conv1d_1').get_weights()
    conv2_weights = autoencoder.get_layer('conv1d_2').get_weights()
    conv3_weights = autoencoder.get_layer('conv1d_3').get_weights()
    
    # set the weights in new model
    model.get_layer('Convolution1').set_weights(conv1_weights)
    model.get_layer('Convolution2').set_weights(conv2_weights)
    model.get_layer('Convolution3').set_weights(conv3_weights)
    
    return model

In [None]:
model = autoencoder_model()
model.summary()
model = load_pretrained_weights(model, 'path_to_weights' 'path_to_architecture')
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

Here we define all data folders which allows us to programm an automated crossvalidation routine and also search for all the data folders so that they can be loaded during training.

In [None]:
data_names = ['doves', 'ducks_boat', 'ducks_children', 'golf', 'holsten_gate', 'koenigstrasse', 'puppies', 'roundabout', 'sea', 'st_petri_gate', 'st_petri_market', 'st_petri_mcdonalds', 'street']

data_path = '/bigpool/strohmfn/gazecom_processed/train'
subfolders = [f.name for f in os.scandir(data_path) if f.is_dir() ]

We are now performing 18-fold crossvalidation and therefore use the data of all but one stimuli for training and the remaining one for testing. As the data is too large for most systems we are loading ten files at a time, train on them, load the next ten and so on. The order in which the files are loaded and the data inside the files are shuffeld each epoch. After each training phase (10 files) we evaluate the performance of the network on the remaining validation set and always save the model with the lowest validation loss. After n epochs of training we evaluate the performance of the latest and the best performing model and save a classificationm report to file.

In [None]:
batch_size = 128
epochs = 5
export_name = 'deep_CNN'

In [None]:
for val_set in data_names:
    best_val_los = 1000.0
    
    x_validate = np.load(data_path + "/" + val_set + "/" + val_set + ".npy")
    y_validate = np.load(data_path + "/" + val_set + "/" + val_set + "_labels.npy")
    
    print("Evaluation Set: " + val_set)
    
    for e in range(epochs):
        print("EPOCH: " + str(e+1))

        shuffle(subfolders)
        
        pbar = tqdm(total=len(subfolders))
        
        for i in range(0, len(subfolders), 10):
            pbar.update(1)
            x_train = None
            y_train = None
            counter = i
            while x_train is None:
                if val_set not in subfolders[counter]:
                    x_train = np.load(data_path + "/" + subfolders[counter] + "/" + subfolders[counter] + ".npy")
                    y_train = np.load(data_path + "/" + subfolders[counter] + "/" + subfolders[counter] + "_labels.npy")
                counter += 1
            while counter < len(subfolders) and counter-i < 10:
                pbar.update(1)
                if val_set not in subfolders[counter]:
                    x = np.load(data_path + "/" + subfolders[counter] + "/" + subfolders[counter] + ".npy")
                    y = np.load(data_path + "/" + subfolders[counter] + "/" + subfolders[counter] + "_labels.npy")
                    x_train = np.concatenate((x_train,x))
                    y_train = np.concatenate((y_train,y))
                counter += 1

            model.fit(x_train, y_train,
                            batch_size=batch_size, 
                            verbose=1, 
                            shuffle=True)

            score = model.evaluate(x_validate, y_validate, verbose=0)
            print(score[0])
            print(score[1])
            if score[0] < best_val_los:
                best_val_los = score[0]
                model.save_weights('best_model_' + export_name + '_' + val_set + '.h5')
        pbar.close()   
    model.save_weights('latest_model_' + export_name + '_' + val_set + '.h5')

    Y_validate = np.argmax(y_validate, axis=1) # Convert one-hot to index
    target_names = ['Fixation', 'Saccade', 'Smooth Pursuit', 'Noise']

    model.load_weights('latest_model_' + export_name + '_' + val_set + '.h5')
    Y_pred = model.predict(x_validate)
    Y_pred = np.argmax(Y_pred,axis=1)

    report = classification_report(Y_validate, Y_pred, digits=5, target_names=target_names)
    
    text_file = open('latest_model_' + export_name + '_' + val_set + '.txt', "w")
    text_file.write(report)
    text_file.close()
    
    model.load_weights('best_model_' + export_name + '_' + val_set + '.h5')
    Y_pred = model.predict(x_validate)
    Y_pred = np.argmax(Y_pred,axis=1)

    report = classification_report(Y_validate, Y_pred, digits=5, target_names=target_names)
    
    text_file = open('best_model_' + export_name + '_' + val_set + '.txt', "w")
    text_file.write(report)
    text_file.close()

