# 4. Answering RQ4

**Performing k-fold cross validation on all models (Trained with DGMs + Digital Biomarkers vs DGMs alone**)

---


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
import cv2
import random
import pickle

from scipy.ndimage import gaussian_filter

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix

from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking, Input, Concatenate, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import backend
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import plot_model

warnings.filterwarnings('ignore')
backend.clear_session()

In [None]:
CECS_698_PATH = '/content/drive/MyDrive/CECS 698 - Data Analysis/'

PARTICIPANTS = [i for i in range(4, 27)]


FPOG_SCANPATHS_PATH = os.path.join(CECS_698_PATH, 'FPOG Scanpaths')

MERGED_DATA = os.path.join(CECS_698_PATH, 'Merged Data')

GOOGLE_FORMS_SHEETS = os.path.join(CECS_698_PATH, 'Google Forms Sheets')

GROUP = {
    'E-H': [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 24],
    'H-E': [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26]
}

# Particpants ids to exclude on specific assessments
EASY_ASSESSMENT_EXCLUDE = [11, 22]
HARD_ASSESSMENT_EXCLUDE = [15, 21]

# Size of image to store image files
IMG_SIZE=(70, 70)

N_SPLITS = 3
DROP_COLUMNS = ['eda_scl_usiemens', 'pulse_rate_bpm', 'temperature_celsius']

# Gathering Data 📈
---

## Getting Scores

In [None]:
df_scores = pd.read_csv(os.path.join(CECS_698_PATH, 'Participant Scores.csv'))

## Combining Questionnaire Data

In [None]:
# ----------------------------------------------- NASA TLX -----------------------------------------------
df_NASA_TLX = pd.read_csv(os.path.join(GOOGLE_FORMS_SHEETS, 'Participants Sheet - NASA TLX.csv'))#.drop('Dry Run?', axis=1)

df_NASA_TLX.columns = [
    'Timestamp',
    'Participant ID',
    'Mental Demand', # 1
    'Physical Demand', # 2
    'Temporal Demand', # 3
    'Performance', # 4
    'Effort', # 5
    'Frustration', # 6
    'Dry Run', # 7
]

df_NASA_TLX = df_NASA_TLX[df_NASA_TLX['Dry Run'].isnull()] # Take out dry run participants
df_NASA_TLX.drop(['Dry Run', 'Timestamp'], axis=1, inplace=True)


# ----------------------------------------------- Pre-Study KSS -----------------------------------------------
df_pre_study_KSS = pd.read_csv(os.path.join(GOOGLE_FORMS_SHEETS, 'Participants Sheet - Pre-Study KSS.csv'))
df_pre_study_KSS = df_pre_study_KSS[df_pre_study_KSS['Dry Run?'].isnull()]
df_pre_study_KSS.drop(['Dry Run?', 'Timestamp'], axis=1, inplace=True)
df_pre_study_KSS.columns = [
    'Participant ID',
    'Hours Awake',
    'Pre-Sleepiness Scale',
]
df_pre_study_KSS['Pre-Sleepiness Scale'] = df_pre_study_KSS['Pre-Sleepiness Scale'].apply(lambda x: int(x.split()[0]))


# ----------------------------------------------- Post-Study KSS -----------------------------------------------
df_post_study_KSS = pd.read_csv(os.path.join(GOOGLE_FORMS_SHEETS, 'Participants Sheet - Post-Study KSS.csv'))
df_post_study_KSS = df_post_study_KSS[df_post_study_KSS['Dry Run?'].isnull()]
df_post_study_KSS.drop(['Dry Run?', 'Timestamp'], axis=1, inplace=True)
df_post_study_KSS.columns = [
    'Participant ID',
    'Post-Sleepiness Scale',
]
df_post_study_KSS['Post-Sleepiness Scale'] = df_post_study_KSS['Post-Sleepiness Scale'].apply(lambda x: int(x.split()[0]))

df_all_questionnaires = df_NASA_TLX.merge(df_pre_study_KSS, on='Participant ID', how='inner').merge(df_post_study_KSS, on='Participant ID', how='inner')
df_all_questionnaires = df_all_questionnaires.merge(df_scores, left_on='Participant ID', right_on='Participant ID ').drop(columns=['Participant ID '])

In [None]:
df_all_questionnaires.columns

Index(['Participant ID', 'Mental Demand', 'Physical Demand', 'Temporal Demand',
       'Performance', 'Effort', 'Frustration', 'Hours Awake',
       'Pre-Sleepiness Scale', 'Post-Sleepiness Scale', 'Score', 'Assessment',
       'Group', 'Elapsed Minutes', 'Successful/Unsuccessful',
       'Python Experience', 'Year of Study'],
      dtype='object')

## Get X, y data then `train_test_split`

In [None]:
def read_y_labels(path):
    y = []
    for p in PARTICIPANTS:
        for diff in ['easy', 'hard']:
            if (diff == 'easy' and p in EASY_ASSESSMENT_EXCLUDE) or (diff == 'hard' and p in HARD_ASSESSMENT_EXCLUDE):
                continue # Excluded assessments (i.e ones with data loss)

            label = df_scores[(df_scores['Participant ID '] == p) & (df_scores['Assessment'] == diff)]['Successful/Unsuccessful'].values[0]
            label = 1 if label == 'Successful' else 0
            y.append(label)

    y = np.array(y)

    return y

def read_X_timeseries(path, exclude_biomarkers=False):
    X = []
    for p in PARTICIPANTS:
        for diff in ['easy', 'hard']:
            if (diff == 'easy' and p in EASY_ASSESSMENT_EXCLUDE) or (diff == 'hard' and p in HARD_ASSESSMENT_EXCLUDE):
                continue # Excluded assessments (i.e ones with data loss)

            folder = f"Participant {p}"
            file = f"participant{p}_{diff}_assessment.csv"

            # Merged data path
            df = pd.read_csv(os.path.join(path, folder, file))
            df.set_index('timestamp_unix', inplace=True)

            if exclude_biomarkers:
                df.drop(columns=DROP_COLUMNS, inplace=True)

            # Synthetic data path
            X.append(df)

    return X

def read_X_categorical(path):
    # Getting all dataframes and splitting them into 2 groups: Successful and Unsuccessful
    X = pd.DataFrame()

    features = ['Participant ID', 'Mental Demand', 'Physical Demand', 'Temporal Demand',
       'Performance', 'Effort', 'Frustration', 'Hours Awake',
       'Pre-Sleepiness Scale', 'Post-Sleepiness Scale', 'Assessment',
       'Group', 'Elapsed Minutes',
       'Python Experience', 'Year of Study']

    for p in PARTICIPANTS:
        for diff in ['easy', 'hard']:
            if (diff == 'easy' and p in EASY_ASSESSMENT_EXCLUDE) or (diff == 'hard' and p in HARD_ASSESSMENT_EXCLUDE):
                continue # Excluded assessments (i.e ones with data loss)

            filter = (df_all_questionnaires['Participant ID'] == p) & (df_all_questionnaires['Assessment'] == diff)
            df_participant = df_all_questionnaires[filter]

            X = pd.concat([X, df_participant[features]], axis=0)

    return X


# Helper function to read a single image
def read_image(img_path, img_size):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, img_size) # Resizing all images to make them uniform
    img = img.astype("float32") / 255.0  # Normalize
    return img

def read_X_image(path, img_size=IMG_SIZE):
    X = []

    for p in PARTICIPANTS:
        for diff in ['easy', 'hard']:
            if (diff == 'easy' and p in EASY_ASSESSMENT_EXCLUDE) or (diff == 'hard' and p in HARD_ASSESSMENT_EXCLUDE):
                continue # Excluded assessments (i.e ones with data loss)

            folder = f"Participant {p}"
            file = f"FPOG {diff}.png"

            # Reading in each image
            img_path = os.path.join(path, folder, file)
            img = read_image(img_path, img_size)
            X.append(img)

    X = np.array(X)
    return X

Train test splitting

In [None]:
X_timeseries = read_X_timeseries(MERGED_DATA)
X_categorical = read_X_categorical(MERGED_DATA)
X_image = read_X_image(FPOG_SCANPATHS_PATH)
y = read_y_labels(MERGED_DATA)

print(f"X_timeseries: {len(X_timeseries)}, X_categorical: {len(X_categorical)}, X_image: {len(X_image)}, y_actual: {len(y)}")

X_timeseries: 42, X_categorical: 42, X_image: 42, y_actual: 42


In [None]:
saved_models = os.path.join(CECS_698_PATH, 'Saved Models 3')
if not os.path.exists(saved_models):
    os.makedirs(saved_models)
    print(f"Created {saved_models}")
else:
    print(f"{saved_models} already exists")

/content/drive/MyDrive/CECS 698 - Data Analysis/Saved Models 3 already exists


# Data Preprocessing 🏭
---

## Function to Preprocess timeseries data

In [None]:
def preprocess_timeseries(X_train, X_test):
    X_train_preprocessed, X_test_preprocessed = [], []

    # For each participant, fit MinMaxScaler on their own data.
    for i, x in enumerate(X_train):
        scaler = MinMaxScaler()
        scaled_values = scaler.fit_transform(x.values)
        X_train_preprocessed.append(scaled_values)

    for i, x in enumerate(X_test):
        scaler = MinMaxScaler()
        scaled_values = scaler.fit_transform(x.values)
        X_test_preprocessed.append(scaled_values)

    # Pad sequences. To ensure consistency, we use the max length from the training set.
    X_train_preprocessed = pad_sequences(X_train_preprocessed, dtype='float32', padding='post')
    maxlen = X_train_preprocessed.shape[1]
    X_test_preprocessed = pad_sequences(X_test_preprocessed, maxlen=maxlen, dtype='float32', padding='post') # Pad sequences to the max length (Expected: 248)

    return X_train_preprocessed, X_test_preprocessed

## Function to Preprocess categorical data

In [None]:
def apply_ohe(X_train, X_test):
    # One hot encoding (Year of Study and Assessment)
    encoder = OneHotEncoder(sparse_output=False, drop='first', handle_unknown='ignore')  # `drop=first` to prevent multicollinearity
    train_encoded = encoder.fit_transform(X_train[['Year of Study', 'Assessment']])
    test_encoded = encoder.transform(X_test[['Year of Study', 'Assessment']])
    train_encoded_df = pd.DataFrame(train_encoded, columns=encoder.get_feature_names_out(['Year of Study', 'Assessment']))
    test_encoded_df = pd.DataFrame(test_encoded, columns=encoder.get_feature_names_out(['Year of Study', 'Assessment']))

    # Resetting row indices
    X_train.reset_index(drop=True, inplace=True)
    X_test.reset_index(drop=True, inplace=True)

    # Drop original categorical columns and concatenate with encoded features
    X_train = pd.concat([X_train.drop(columns=['Year of Study', 'Assessment']), train_encoded_df], axis=1)
    X_test = pd.concat([X_test.drop(columns=['Year of Study', 'Assessment']), test_encoded_df], axis=1)

    return X_train, X_test

def apply_scaling(X_train, X_test):
    scaler = MinMaxScaler()
    numerical_columns = [
        'Mental Demand',
        'Physical Demand',
        'Temporal Demand',
        'Performance',
        'Effort',
        'Frustration',
        'Hours Awake',
        'Elapsed Minutes',
        'Pre-Sleepiness Scale',
        'Post-Sleepiness Scale'
    ]

    X_train[numerical_columns] = scaler.fit_transform(X_train[numerical_columns])
    X_test[numerical_columns] = scaler.transform(X_test[numerical_columns])

    return X_train, X_test

def apply_ordinal(X_train, X_test):
    experience_level = list(X_train['Python Experience'].unique())

    encoder = OrdinalEncoder(categories=[experience_level])
    X_train[['Python Experience']] = encoder.fit_transform(X_train[['Python Experience']])
    X_test[['Python Experience']] = encoder.transform(X_test[['Python Experience']])

    return X_train, X_test

def preprocess_categorical(X_train, X_test):
    # We can change the `Year of Study` feature to divide between undergrad and graduate students strictly.
    X_train['Year of Study'] = X_train['Year of Study'].apply(lambda x: 'Undergrad' if 'Undergrad' in x else 'Grad')
    X_test['Year of Study'] = X_test['Year of Study'].apply(lambda x: 'Undergrad' if 'Undergrad' in x else 'Grad')

    # One hot encoding
    X_train, X_test = apply_ohe(X_train.copy(), X_test.copy())


    # Dropping irrelevant columns from categorical data
    X_train.drop(columns=['Group', 'Participant ID'], inplace=True)
    X_train.reset_index(drop=True, inplace=True)
    X_test.drop(columns=['Group', 'Participant ID'], inplace=True)
    X_test.reset_index(drop=True, inplace=True)

    # MinMaxScaling
    X_train, X_test = apply_scaling(X_train.copy(), X_test.copy())

    # Ordinal encoding
    X_train, X_test = apply_ordinal(X_train.copy(), X_test.copy())

    return X_train, X_test

## Function to Preprocess Image Data

Rearranging the shape of train and test datasets to have one more dimension at the end representing color channels:

(Number of Images x Height x Width x Color Channels)

In [None]:
def preprocess_image(X_train, X_test):
    X_train = np.array(X_train)
    X_test = np.array(X_test)

    X_train = np.expand_dims(X_train, axis=-1)
    X_test = np.expand_dims(X_test, axis=-1)

    return X_train, X_test

# Model Training: Eye Tracking and Physiological Biomarkers 🫀👁️

---

## Single Modal: Timeseries

In [None]:
skf = StratifiedKFold(n_splits=3, shuffle=True) # Stratified K-Fold Cross Validation

for fold_no, (train_index, test_index) in enumerate(skf.split(X_timeseries, y)):
    train_labels = y[train_index]
    test_labels = y[test_index]

    print(f"{len(train_labels), len(test_labels)}")

(28, 14)
(28, 14)
(28, 14)


In [None]:
def build_model_single_modal():
    num_features = X_timeseries[0].shape[1]

    model = Sequential()
    model.add(LSTM(64, return_sequences=True, input_shape=(None, num_features)))
    # model.add(LSTM(64, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(32, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Output for binary classification

    # Compile the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

In [None]:
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True) # Stratified K-Fold Cross Validation
path = os.path.join(saved_models, 'single_modal.keras')

acc_per_fold = []
loss_per_fold = []
num_datapoints = len(X_timeseries)

for fold_no, (train_index, test_index) in enumerate(skf.split(X_timeseries, y)):
    # ---------------------------- Preprocess Time Series Data ----------------------------
    train_time_series = []
    for i in train_index:
        train_time_series.append(X_timeseries[i])

    test_time_series = []
    for i in test_index:
        test_time_series.append(X_timeseries[i])

    train_time_series, test_time_series = preprocess_timeseries(train_time_series.copy(), test_time_series.copy())

    # ---------------------------- Get Labels ----------------------------
    train_labels = y[train_index]
    test_labels = y[test_index]


    # ---------------------------- Build and Train the Model ----------------------------
    # Callbacks
    early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
    checkpoint = ModelCheckpoint(
        filepath=path,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=0
    )

    model = build_model_single_modal()
    history = model.fit(
        train_time_series,
        train_labels,
        epochs=500,
        batch_size=8,
        validation_data=(test_time_series, test_labels),
        callbacks=[early_stop, checkpoint],
        verbose=0
    )

    # Evaluate on the test fold
    model = load_model(path)
    scores = model.evaluate(test_time_series, test_labels, verbose=0)
    num_epochs_ran = len(history.history['loss'])
    print(f'Fold {fold_no + 1} - Test Loss: {scores[0]:.4f}, Test Accuracy: {scores[1]:.4f}, Epochs: {num_epochs_ran}')
    acc_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])

print('===============================================')
print('Average scores for all folds:')
print(f'Accuracy: {np.mean(acc_per_fold):.4f}, Loss: {np.mean(loss_per_fold):.4f}')

Fold 1 - Test Loss: 0.6941, Test Accuracy: 0.4286, Epochs: 11
Fold 2 - Test Loss: 0.8691, Test Accuracy: 0.5714, Epochs: 17
Fold 3 - Test Loss: 0.6932, Test Accuracy: 0.5000, Epochs: 12
Average scores for all folds:
Accuracy: 0.5000, Loss: 0.7521


## 2-Modal: Timeseries + Questionnaires

In [None]:
def build_model_two_modal_ts_q():
    # Time Series Input (LSTM)
    num_timeseries_features = X_timeseries[0].shape[1]
    time_input = Input(shape=(None, num_timeseries_features), name="Time_Series_Input")
    ts = LSTM(64, return_sequences=True)(time_input)
    ts = Dropout(0.2)(ts)
    ts = LSTM(32, return_sequences=False)(ts)
    ts = Dropout(0.2)(ts)
    ts = Dense(16, activation='relu')(ts)

    # Questionnaire Input (Dense)
    num_questionnaire_features = 13
    questionnaire_input = Input(shape=(num_questionnaire_features,), name='Questionnaire_Input')
    q = Dense(64, activation='relu')(questionnaire_input)
    q = Dropout(0.2)(q)
    q = Dense(32, activation='relu')(q)
    q = Dropout(0.2)(q)
    q = Dense(16, activation='relu')(q)

    ## Merge all inputs
    merged = Concatenate()([ts, q])
    output = Dense(1, activation='sigmoid', name="Output")(merged)

    # Define model
    model = Model(inputs=[time_input, questionnaire_input], outputs=output)

    # Compile the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

In [None]:
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True) # Stratified K-Fold Cross Validation
path = os.path.join(saved_models, 'two_modal.keras')

acc_per_fold = []
loss_per_fold = []
num_datapoints = len(X_timeseries)

for fold_no, (train_index, test_index) in enumerate(skf.split(X_timeseries, y)):
    # ---------------------------- Preprocess Time Series Data ----------------------------
    train_time_series = []
    for i in train_index:
        train_time_series.append(X_timeseries[i])

    test_time_series = []
    for i in test_index:
        test_time_series.append(X_timeseries[i])

    train_time_series, test_time_series = preprocess_timeseries(train_time_series.copy(), test_time_series.copy())

    # ---------------------------- Preprocess Categorical Data ----------------------------
    train_categorical = X_categorical.iloc[train_index]
    test_categorical = X_categorical.iloc[test_index]

    train_categorical, test_categorical = preprocess_categorical(train_categorical.copy(), test_categorical.copy())

    # ---------------------------- Get Labels ----------------------------
    train_labels = y[train_index]
    test_labels = y[test_index]

    # ---------------------------- Build and Train the Model ----------------------------
    # Callbacks
    early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
    checkpoint = ModelCheckpoint(
        filepath=path,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=0
    )

    model = build_model_two_modal_ts_q()
    # Train the Model
    history = model.fit(
        [train_time_series, train_categorical], train_labels,
        epochs=500,
        batch_size=8,
        validation_data=([test_time_series, test_categorical], test_labels),
        callbacks=[early_stop, checkpoint],
        verbose=0
    )

    # Evaluate on the test fold
    model = load_model(path)
    scores =  model.evaluate([test_time_series, test_categorical], test_labels, verbose=0) # model.evaluate(test_time_padded, test_labels, verbose=0)
    num_epochs_ran = len(history.history['loss'])
    print(f'Fold {fold_no + 1} - Test Loss: {scores[0]:.4f}, Test Accuracy: {scores[1]:.4f}, Epochs: {num_epochs_ran}')
    acc_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])

print('===============================================')
print('Average scores for all folds:')
print(f'Accuracy: {np.mean(acc_per_fold):.4f}, Loss: {np.mean(loss_per_fold):.4f}')

Fold 1 - Test Loss: 0.6601, Test Accuracy: 0.5714, Epochs: 18
Fold 2 - Test Loss: 0.6777, Test Accuracy: 0.7143, Epochs: 28
Fold 3 - Test Loss: 0.6668, Test Accuracy: 0.7143, Epochs: 50
Average scores for all folds:
Accuracy: 0.6667, Loss: 0.6682


## 2-Modal: Timeseries + Images

In [None]:
def build_model_two_modal_ts_img():
    # Time Series Input (LSTM)
    num_timeseries_features = X_timeseries[0].shape[1]
    time_input = Input(shape=(None, num_timeseries_features), name="Time_Series_Input")
    ts = LSTM(64, return_sequences=True)(time_input)
    ts = Dropout(0.2)(ts)
    ts = LSTM(32, return_sequences=False)(ts)
    ts = Dropout(0.2)(ts)
    ts = Dense(16, activation='relu')(ts)

    # Image Input (CNN)
    input_shape = (IMG_SIZE[0], IMG_SIZE[1], 1)
    image_input = Input(shape=input_shape, name="Image_Input")
    img = Conv2D(64, (3, 3), activation='relu')(image_input)
    img = MaxPooling2D((2, 2))(img)
    img = Conv2D(32, (3, 3), activation='relu')(img)
    img = MaxPooling2D((2, 2))(img)
    img = Conv2D(16, (3, 3), activation='relu')(img)
    img = MaxPooling2D((2, 2))(img)
    img = Flatten()(img)
    img = Dense(64, activation='relu')(img)

    # Merge all inputs
    merged = Concatenate()([ts, img])
    output = Dense(1, activation='sigmoid', name="Output")(merged)

    # Define model
    model = Model(inputs=[time_input, image_input], outputs=output)

    # Compile the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

In [None]:
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True) # Stratified K-Fold Cross Validation
path = os.path.join(saved_models, 'two_modal2.keras')

acc_per_fold = []
loss_per_fold = []
num_datapoints = len(X_timeseries)

for fold_no, (train_index, test_index) in enumerate(skf.split(X_timeseries, y)):
    # ---------------------------- Preprocess Time Series Data ----------------------------
    train_time_series = []
    for i in train_index:
        train_time_series.append(X_timeseries[i])

    test_time_series = []
    for i in test_index:
        test_time_series.append(X_timeseries[i])

    train_time_series, test_time_series = preprocess_timeseries(train_time_series.copy(), test_time_series.copy())

    # ---------------------------- Preprocess Image Data ----------------------------
    train_image = []
    for i in train_index:
        train_image.append(X_image[i])

    test_image = []
    for i in test_index:
        test_image.append(X_image[i])

    train_image, test_image = preprocess_image(train_image.copy(), test_image.copy())

    # ---------------------------- Get Labels ----------------------------
    train_labels = y[train_index]
    test_labels = y[test_index]

    # ---------------------------- Build and Train the Model ----------------------------
    # Callbacks
    early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
    checkpoint = ModelCheckpoint(
        filepath=path,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=0
    )

    model = build_model_two_modal_ts_img()
    # Train the Model
    history = model.fit(
        [train_time_series, train_image], train_labels,
        epochs=500,
        batch_size=8,
        validation_data=([test_time_series, test_image], test_labels),
        callbacks=[early_stop, checkpoint],
        verbose=0
    )

    # Evaluate on the test fold
    model = load_model(path)
    scores =  model.evaluate([test_time_series, test_image], test_labels, verbose=0)
    num_epochs_ran = len(history.history['loss'])
    print(f'Fold {fold_no + 1} - Test Loss: {scores[0]:.4f}, Test Accuracy: {scores[1]:.4f}, Epochs: {num_epochs_ran}')
    acc_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])

print('===============================================')
print('Average scores for all folds:')
print(f'Accuracy: {np.mean(acc_per_fold):.4f}, Loss: {np.mean(loss_per_fold):.4f}')

Fold 1 - Test Loss: 0.5595, Test Accuracy: 0.7857, Epochs: 17
Fold 2 - Test Loss: 0.7086, Test Accuracy: 0.6429, Epochs: 11
Fold 3 - Test Loss: 0.7167, Test Accuracy: 0.5714, Epochs: 11
Average scores for all folds:
Accuracy: 0.6667, Loss: 0.6616


## 3-Modal: Timeseries + Questionnaires + Images

In [None]:
def build_model_three_modal():
    # Time Series Input (LSTM)
    num_timeseries_features = X_timeseries[0].shape[1]
    time_input = Input(shape=(None, num_timeseries_features), name="Time_Series_Input")
    ts = LSTM(64, return_sequences=True)(time_input)
    ts = Dropout(0.2)(ts)
    ts = LSTM(32, return_sequences=False)(ts)
    ts = Dropout(0.2)(ts)
    ts = Dense(16, activation='relu')(ts)

    ## Questionnaire Input (Dense)
    num_questionnaire_features = 13
    questionnaire_input = Input(shape=(num_questionnaire_features,), name='Questionnaire_Input')
    q = Dense(64, activation='relu')(questionnaire_input)
    q = Dropout(0.2)(q)
    q = Dense(32, activation='relu')(q)
    q = Dropout(0.2)(q)
    q = Dense(16, activation='relu')(q)

    # Image Input (CNN)
    input_shape = (IMG_SIZE[0], IMG_SIZE[1], 1)
    image_input = Input(shape=input_shape, name="Image_Input")
    img = Conv2D(64, (3, 3), activation='relu')(image_input)
    img = MaxPooling2D((2, 2))(img)
    img = Conv2D(32, (3, 3), activation='relu')(img)
    img = MaxPooling2D((2, 2))(img)
    img = Conv2D(16, (3, 3), activation='relu')(img)
    img = MaxPooling2D((2, 2))(img)
    img = Flatten()(img)
    img = Dense(64, activation='relu')(img)

    ## Merge all inputs
    merged = Concatenate()([ts, q, img])
    output = Dense(1, activation='sigmoid', name="Output")(merged)

    # Define model
    model = Model(inputs=[time_input, questionnaire_input, image_input], outputs=output)

    # Compile the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

In [None]:
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True) # Stratified K-Fold Cross Validation
path = os.path.join(saved_models, 'three_modal.keras')

acc_per_fold = []
loss_per_fold = []
num_datapoints = len(X_timeseries)

for fold_no, (train_index, test_index) in enumerate(skf.split(X_timeseries, y)):
    # ---------------------------- Preprocess Time Series Data ----------------------------
    train_time_series = []
    for i in train_index:
        train_time_series.append(X_timeseries[i])

    test_time_series = []
    for i in test_index:
        test_time_series.append(X_timeseries[i])

    train_time_series, test_time_series = preprocess_timeseries(train_time_series.copy(), test_time_series.copy())

    # ---------------------------- Preprocess Categorical Data ----------------------------
    train_categorical = X_categorical.iloc[train_index]
    test_categorical = X_categorical.iloc[test_index]

    train_categorical, test_categorical = preprocess_categorical(train_categorical.copy(), test_categorical.copy())

    # ---------------------------- Preprocess Image Data ----------------------------
    train_image = []
    for i in train_index:
        train_image.append(X_image[i])

    test_image = []
    for i in test_index:
        test_image.append(X_image[i])

    train_image, test_image = preprocess_image(train_image.copy(), test_image.copy())

    # ---------------------------- Get Labels ----------------------------
    train_labels = y[train_index]
    test_labels = y[test_index]

    # ---------------------------- Build and Train the Model ----------------------------
    # Callbacks
    early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
    checkpoint = ModelCheckpoint(
        filepath=path,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=0
    )

    model = build_model_three_modal()
    # Train the Model
    history = model.fit(
        [train_time_series, train_categorical, train_image], train_labels,
        epochs=500,
        batch_size=8,
        validation_data=([test_time_series, test_categorical, test_image], test_labels),
        callbacks=[early_stop, checkpoint],
        verbose=0
    )

    # Evaluate on the test fold
    model = load_model(path)
    scores =  model.evaluate([test_time_series, test_categorical, test_image], test_labels, verbose=0)
    num_epochs_ran = len(history.history['loss'])
    print(f'Fold {fold_no + 1} - Test Loss: {scores[0]:.4f}, Test Accuracy: {scores[1]:.4f}, Epochs: {num_epochs_ran}')
    acc_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])

print('===============================================')
print('Average scores for all folds:')
print(f'Accuracy: {np.mean(acc_per_fold):.4f}, Loss: {np.mean(loss_per_fold):.4f}')

Fold 1 - Test Loss: 0.6820, Test Accuracy: 0.5714, Epochs: 11
Fold 2 - Test Loss: 0.5345, Test Accuracy: 0.7857, Epochs: 18
Fold 3 - Test Loss: 0.6047, Test Accuracy: 0.7143, Epochs: 15
Average scores for all folds:
Accuracy: 0.6905, Loss: 0.6071


# Model Training: Exclusively Eye Tracking 👁️


In [None]:
backend.clear_session()

In [None]:
X_timeseries = read_X_timeseries(MERGED_DATA, exclude_biomarkers=True)
X_categorical = read_X_categorical(MERGED_DATA)
X_image = read_X_image(FPOG_SCANPATHS_PATH)
y = read_y_labels(MERGED_DATA)

print(f"X_timeseries: {len(X_timeseries)}, X_categorical: {len(X_categorical)}, X_image: {len(X_image)}, y_actual: {len(y)}")

X_timeseries: 42, X_categorical: 42, X_image: 42, y_actual: 42


## Single Modal: Timeseries

In [None]:
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True) # Stratified K-Fold Cross Validation
path = os.path.join(saved_models, 'single_modal_ex_biomarkers.keras')

acc_per_fold = []
loss_per_fold = []
num_datapoints = len(X_timeseries)

for fold_no, (train_index, test_index) in enumerate(skf.split(X_timeseries, y)):
    # ---------------------------- Preprocess Time Series Data ----------------------------
    train_time_series = []
    for i in train_index:
        train_time_series.append(X_timeseries[i])

    test_time_series = []
    for i in test_index:
        test_time_series.append(X_timeseries[i])

    train_time_series, test_time_series = preprocess_timeseries(train_time_series.copy(), test_time_series.copy())

    # ---------------------------- Get Labels ----------------------------
    train_labels = y[train_index]
    test_labels = y[test_index]


    # ---------------------------- Build and Train the Model ----------------------------
    # Callbacks
    early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
    checkpoint = ModelCheckpoint(
        filepath=path,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=0
    )

    model = build_model_single_modal()
    history = model.fit(
        train_time_series,
        train_labels,
        epochs=500,
        batch_size=8,
        validation_data=(test_time_series, test_labels),
        callbacks=[early_stop, checkpoint],
        verbose=0
    )

    # Evaluate on the test fold
    model = load_model(path)
    scores = model.evaluate(test_time_series, test_labels, verbose=0)
    num_epochs_ran = len(history.history['loss'])
    print(f'Fold {fold_no + 1} ({len(train_labels)}) - Test Loss: {scores[0]:.4f}, Test Accuracy: {scores[1]:.4f}, Epochs: {num_epochs_ran}')
    acc_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])

print('===============================================')
print('Average scores for all folds:')
print(f'Accuracy: {np.mean(acc_per_fold):.4f}, Loss: {np.mean(loss_per_fold):.4f}')

Fold 1 (28) - Test Loss: 0.6927, Test Accuracy: 0.5714, Epochs: 11
Fold 2 (28) - Test Loss: 0.4786, Test Accuracy: 0.8571, Epochs: 41
Fold 3 (28) - Test Loss: 0.5924, Test Accuracy: 0.6429, Epochs: 41
Average scores for all folds:
Accuracy: 0.6905, Loss: 0.5879


## 2-Modal: Timeseries + Questionnaires

In [None]:
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True) # Stratified K-Fold Cross Validation
path = os.path.join(saved_models, 'two_modal_ex_biomarkers.keras')

acc_per_fold = []
loss_per_fold = []
num_datapoints = len(X_timeseries)

for fold_no, (train_index, test_index) in enumerate(skf.split(X_timeseries, y)):
    # ---------------------------- Preprocess Time Series Data ----------------------------
    train_time_series = []
    for i in train_index:
        train_time_series.append(X_timeseries[i])

    test_time_series = []
    for i in test_index:
        test_time_series.append(X_timeseries[i])

    train_time_series, test_time_series = preprocess_timeseries(train_time_series.copy(), test_time_series.copy())

    # ---------------------------- Preprocess Categorical Data ----------------------------
    train_categorical = X_categorical.iloc[train_index]
    test_categorical = X_categorical.iloc[test_index]

    train_categorical, test_categorical = preprocess_categorical(train_categorical.copy(), test_categorical.copy())

    # ---------------------------- Get Labels ----------------------------
    train_labels = y[train_index]
    test_labels = y[test_index]

    # ---------------------------- Build and Train the Model ----------------------------
    # Callbacks
    early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
    checkpoint = ModelCheckpoint(
        filepath=path,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=0
    )

    model = build_model_two_modal_ts_q()
    # Train the Model
    history = model.fit(
        [train_time_series, train_categorical], train_labels,
        epochs=500,
        batch_size=8,
        validation_data=([test_time_series, test_categorical], test_labels),
        callbacks=[early_stop, checkpoint],
        verbose=0
    )

    # Evaluate on the test fold
    model = load_model(path)
    scores =  model.evaluate([test_time_series, test_categorical], test_labels, verbose=0)
    num_epochs_ran = len(history.history['loss'])
    print(f'Fold {fold_no + 1} - Test Loss: {scores[0]:.4f}, Test Accuracy: {scores[1]:.4f}, Epochs: {num_epochs_ran}')
    acc_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])

print('===============================================')
print('Average scores for all folds:')
print(f'Accuracy: {np.mean(acc_per_fold):.4f}, Loss: {np.mean(loss_per_fold):.4f}')

Fold 1 - Test Loss: 0.6842, Test Accuracy: 0.7143, Epochs: 19
Fold 2 - Test Loss: 0.6294, Test Accuracy: 0.7143, Epochs: 27
Fold 3 - Test Loss: 0.6865, Test Accuracy: 0.6429, Epochs: 16
Average scores for all folds:
Accuracy: 0.6905, Loss: 0.6667


## 2-Modal: Timeseries + Images

In [None]:
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True) # Stratified K-Fold Cross Validation
path = os.path.join(saved_models, 'two_modal2_ex_biomarkers.keras')

acc_per_fold = []
loss_per_fold = []
num_datapoints = len(X_timeseries)

for fold_no, (train_index, test_index) in enumerate(skf.split(X_timeseries, y)):
    # ---------------------------- Preprocess Time Series Data ----------------------------
    train_time_series = []
    for i in train_index:
        train_time_series.append(X_timeseries[i])

    test_time_series = []
    for i in test_index:
        test_time_series.append(X_timeseries[i])

    train_time_series, test_time_series = preprocess_timeseries(train_time_series.copy(), test_time_series.copy())

    # ---------------------------- Preprocess Image Data ----------------------------
    train_image = []
    for i in train_index:
        train_image.append(X_image[i])

    test_image = []
    for i in test_index:
        test_image.append(X_image[i])

    train_image, test_image = preprocess_image(train_image.copy(), test_image.copy())

    # ---------------------------- Get Labels ----------------------------
    train_labels = y[train_index]
    test_labels = y[test_index]

    # ---------------------------- Build and Train the Model ----------------------------
    # Callbacks
    early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
    checkpoint = ModelCheckpoint(
        filepath=path,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=0
    )

    model = build_model_two_modal_ts_img()
    # Train the Model
    history = model.fit(
        [train_time_series, train_image], train_labels,
        epochs=500,
        batch_size=8,
        validation_data=([test_time_series, test_image], test_labels),
        callbacks=[early_stop, checkpoint],
        verbose=0
    )

    # Evaluate on the test fold
    model = load_model(path)
    scores =  model.evaluate([test_time_series, test_image], test_labels, verbose=0)
    num_epochs_ran = len(history.history['loss'])
    print(f'Fold {fold_no + 1} - Test Loss: {scores[0]:.4f}, Test Accuracy: {scores[1]:.4f}, Epochs: {num_epochs_ran}')
    acc_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])

print('===============================================')
print('Average scores for all folds:')
print(f'Accuracy: {np.mean(acc_per_fold):.4f}, Loss: {np.mean(loss_per_fold):.4f}')

Fold 1 - Test Loss: 0.5654, Test Accuracy: 0.7857, Epochs: 19
Fold 2 - Test Loss: 0.6416, Test Accuracy: 0.7857, Epochs: 15
Fold 3 - Test Loss: 0.5411, Test Accuracy: 0.8571, Epochs: 16
Average scores for all folds:
Accuracy: 0.8095, Loss: 0.5827


## 3-Modal: Timeseries + Questionnaires + Images

In [None]:
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True) # Stratified K-Fold Cross Validation
path = os.path.join(saved_models, 'three_modal_ex_biomarkers.keras')

acc_per_fold = []
loss_per_fold = []
num_datapoints = len(X_timeseries)

for fold_no, (train_index, test_index) in enumerate(skf.split(X_timeseries, y)):
    # ---------------------------- Preprocess Time Series Data ----------------------------
    train_time_series = []
    for i in train_index:
        train_time_series.append(X_timeseries[i])

    test_time_series = []
    for i in test_index:
        test_time_series.append(X_timeseries[i])

    train_time_series, test_time_series = preprocess_timeseries(train_time_series.copy(), test_time_series.copy())

    # ---------------------------- Preprocess Categorical Data ----------------------------
    train_categorical = X_categorical.iloc[train_index]
    test_categorical = X_categorical.iloc[test_index]

    train_categorical, test_categorical = preprocess_categorical(train_categorical.copy(), test_categorical.copy())

    # ---------------------------- Preprocess Image Data ----------------------------
    train_image = []
    for i in train_index:
        train_image.append(X_image[i])

    test_image = []
    for i in test_index:
        test_image.append(X_image[i])

    train_image, test_image = preprocess_image(train_image.copy(), test_image.copy())

    # ---------------------------- Get Labels ----------------------------
    train_labels = y[train_index]
    test_labels = y[test_index]

    # ---------------------------- Build and Train the Model ----------------------------
    # Callbacks
    early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
    checkpoint = ModelCheckpoint(
        filepath=path,
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=0
    )

    model = build_model_three_modal()
    # Train the Model
    history = model.fit(
        [train_time_series, train_categorical, train_image], train_labels,
        epochs=500,
        batch_size=8,
        validation_data=([test_time_series, test_categorical, test_image], test_labels),
        callbacks=[early_stop, checkpoint],
        verbose=0
    )

    # Evaluate on the test fold
    model = load_model(path)
    scores =  model.evaluate([test_time_series, test_categorical, test_image], test_labels, verbose=0)
    num_epochs_ran = len(history.history['loss'])
    print(f'Fold {fold_no + 1} - Test Loss: {scores[0]:.4f}, Test Accuracy: {scores[1]:.4f}, Epochs: {num_epochs_ran}')
    acc_per_fold.append(scores[1])
    loss_per_fold.append(scores[0])

print('===============================================')
print('Average scores for all folds:')
print(f'Accuracy: {np.mean(acc_per_fold):.4f}, Loss: {np.mean(loss_per_fold):.4f}')

Fold 1 - Test Loss: 0.6731, Test Accuracy: 0.6429, Epochs: 12
Fold 2 - Test Loss: 1.1288, Test Accuracy: 0.8571, Epochs: 16
Fold 3 - Test Loss: 0.6965, Test Accuracy: 0.5714, Epochs: 13
Average scores for all folds:
Accuracy: 0.6905, Loss: 0.8328
