In [None]:
import numpy as np
import pandas as pd
import pickle as pk
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.callbacks import EarlyStopping
from sklearn.model_selection import KFold
from sklearn.metrics import cohen_kappa_score, roc_auc_score

In [1]:
def lrcv(k, X, y):
    skf = KFold(n_splits=k, shuffle=True)
    # Train a logistic regression for each fold
    aucs = []
    kappas = []
    # Calculate metrics for each affect
    for y_i in y.T:
        print('processing an affect')
        for train_index, test_index in skf.split(X, y_i):t
            print('processing a fold')
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y_i[train_index], y_i[test_index]
            log_reg = LogisticRegression(max_iter=10000)
            log_reg.fit(X_train, y_train)
            y_pred = log_reg.predict(X_test)
            aucs.append(roc_auc_score(y_test, y_pred))
            kappas.append(cohen_kappa_score(y_test, np.around(y_pred)))
    return np.mean(fold_aucs), np.mean(fold_kappas)

In [None]:
input_data = pk.load(open('input_data.pkl', 'rb'))
target_data = pk.load(open('target_data.pkl', 'rb'))

layers = []
dimention = []
auc = []
kappa = []

for lay in range(1,4):
    for dim in np.power(2, np.arange(12)):
        print(lay, dim)
        aucs = []
        kappas = []
        for i in range(5):
            print('creating random lstm')
            # Make the LSTM
            rand_lstm = Sequential()
            rand_lstm.add(LSTM(dim, activation='sigmoid', return_sequences=True, input_shape=(1, input_data[0].shape[1])))
            for l in range(lay-1):
                rand_lstm.add(LSTM(dim, activation='sigmoid', return_sequences=True))
            rand_lstm.compile(optimizer='adam', loss='mse')
            outputs =[l.output for l in rand_lstm.layers]
            inter_model = keras.Model(inputs=rand_lstm.inputs, outputs=outputs)
            
            # Project each input into higher dimensions
            print('projecting input sequences')
            log_input = []
            for input_batch in input_data:
                input_batch = input_batch.reshape(input_batch.shape[0], 1, input_batch.shape[1])
                batch_out = np.stack(inter_model.predict(input_batch))
                batch_out = batch_out[-1,:,:].flatten() if lay == 1 else batch_out[:,-1,:,:].flatten()
                log_input.append(batch_out)
                rand_lstm.reset_states()
            log_input = np.stack(log_input)
            
            # Get the average auc and kappa for all affects and folds
            mean_auc, mean_kappa = lrcv(5, log_input, np.array(target_data).astype(int))
            aucs.append(mean_auc)
            kappas.append(mean_kappa)
        layers.append(lay)
        dimention.append(log_input.shape[1])
        auc.append(np.mean(aucs))
        kappa.append(np.mean(kappas))

In [None]:
x = 12

plt.figure()
for i in range(3):
    plt.plot(dimention[i*x:i*x+x], auc[i*x:i*x+x], marker='.', label=f'{i+1} Layer LSTM')
plt.xlabel('Projected Dimensions')
plt.ylabel('ROC AUC')
plt.title('Random LSTM Results')
plt.legend()
plt.show()

plt.figure()
for i in range(3):
    plt.plot(dimention[i*x:i*x+x], kappa[i*x:i*x+x], marker='.', label=f'{i+1} Layer LSTM')
plt.xlabel('Projected Dimensions')
plt.ylabel('Cohen\'s Kappa')
plt.title('Random LSTM Results')
plt.legend()
plt.show()