In [None]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.15
session = tf.Session(config=config)
tf.keras.backend.set_session(session)

In [None]:
import numpy as np
import os
from keras.models import Model, Sequential
from keras.layers import Conv1D, Flatten, Dense, Activation, MaxPooling1D, Input, Dense, multiply, Dropout
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam, SGD
from sklearn.metrics import classification_report
from random import shuffle
from tqdm import tqdm

In [None]:
def ACNN_model():
    inputs = Input(shape=(251,2))

    conv1 = Conv1D(filters=16, kernel_size=(15), padding='same')(inputs)
    #conv1 = BatchNormalization()(conv1)
    conv1 = Activation('relu')(conv1)
    conv1 = MaxPooling1D(strides=2, pool_size=2, padding='same')(conv1)
    
    attention = Conv1D(filters=16, kernel_size=(15), padding='same', activation='softmax')(conv1)
    #attention = BatchNormalization()(attention)
    attention = Activation('relu')(attention)
    attention = multiply([conv1, attention])
    
    flat = Flatten()(attention)
    
    dropout1 = Dropout(0.2)(flat)
    
    dense = Dense(units=32)(dropout1)
    #dense = BatchNormalization()(dense)
    dense = Activation('relu')(dense)
    
    dropout2 = Dropout(0.5)(dense)
    
    output = Dense(4, activation='softmax')(dropout2)
    model = Model(input=[inputs], output=output)
    
    return model

In [None]:
model = ACNN_model()
model.summary()

#optimizer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
optimizer = Adam()
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

Here we define all data folders which allows us to programm an automated crossvalidation routine and also search for all the data folders so that they can be loaded during training.

In [None]:
data_names = ['beach', 'breite_strasse', 'bridge_1', 'bridge_2', 'bumblebee', 'doves', 'ducks_boat', 'ducks_children', 'golf', 'holsten_gate', 'koenigstrasse', 'puppies', 'roundabout', 'sea', 'st_petri_gate', 'st_petri_market', 'st_petri_mcdonalds', 'street']

data_path = '/bigpool/strohmfn/gazecom_processed/train'
subfolders = [f.name for f in os.scandir(data_path) if f.is_dir() ]

We are now performing 18-fold crossvalidation and therefore use the data of all but one stimuli for training and the remaining one for testing. As the data is too large for most systems we are loading ten files at a time, train on them, load the next ten and so on. The order in which the files are loaded and the data inside the files are shuffeld each epoch. After each training phase (10 files) we evaluate the performance of the network on the remaining validation set and always save the model with the lowest validation loss. After n epochs of training we evaluate the performance of the latest and the best performing model and save a classificationm report to file.

In [None]:
batch_size = 128
epochs = 50
export_name = 'ACNN'

In [None]:
for val_set in data_names:
    best_val_los = 1000.0
    
    x_validate = np.load(data_path + "/" + val_set + "/" + val_set + ".npy")
    y_validate = np.load(data_path + "/" + val_set + "/" + val_set + "_labels.npy")
    
    print("Evaluation Set: " + val_set)
    
    for e in range(epochs):
        print("EPOCH: " + str(e+1))

        shuffle(subfolders)
        
        pbar = tqdm(total=len(subfolders))
        
        for i in range(0, len(subfolders), 10):
            pbar.update(1)
            x_train = None
            y_train = None
            counter = i
            while x_train is None:
                if val_set not in subfolders[counter]:
                    x_train = np.load(data_path + "/" + subfolders[counter] + "/" + subfolders[counter] + ".npy")
                    y_train = np.load(data_path + "/" + subfolders[counter] + "/" + subfolders[counter] + "_labels.npy")
                counter += 1
            while counter < len(subfolders) and counter-i < 10:
                pbar.update(1)
                if val_set not in subfolders[counter]:
                    x = np.load(data_path + "/" + subfolders[counter] + "/" + subfolders[counter] + ".npy")
                    y = np.load(data_path + "/" + subfolders[counter] + "/" + subfolders[counter] + "_labels.npy")
                    x_train = np.concatenate((x_train,x))
                    y_train = np.concatenate((y_train,y))
                counter += 1

            model.fit(x_train, y_train,
                            batch_size=batch_size, 
                            verbose=1, 
                            shuffle=True)

            score = model.evaluate(x_validate, y_validate, verbose=0)
            print(score[0])
            print(score[1])
            if score[0] < best_val_los:
                best_val_los = score[0]
                model.save_weights('best_model_' + export_name + '_' + val_set + '.h5')
        pbar.close()   
    model.save_weights('latest_model_' + export_name + '_' + val_set + '.h5')
    
    # evaluate performance of models
    Y_validate = np.argmax(y_validate, axis=1) # Convert one-hot to index
    target_names = ['Fixation', 'Saccade', 'Smooth Pursuit', 'Noise']

    model.load_weights('latest_model_' + export_name + '_' + val_set + '.h5')
    Y_pred = model.predict(x_validate)
    Y_pred = np.argmax(Y_pred,axis=1)

    report = classification_report(Y_validate, Y_pred, digits=5, target_names=target_names)
    
    text_file = open('latest_model_' + export_name + '_' + val_set + '.txt', "w")
    text_file.write(report)
    text_file.close()
    
    model.load_weights('best_model_' + export_name + '_' + val_set + '.h5')
    Y_pred = model.predict(x_validate)
    Y_pred = np.argmax(Y_pred,axis=1)

    report = classification_report(Y_validate, Y_pred, digits=5, target_names=target_names)
    
    text_file = open('best_model_' + export_name + '_' + val_set + '.txt', "w")
    text_file.write(report)
    text_file.close()