In [1]:
import numpy as np
import pandas as pd
import pickle as pk
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.callbacks import EarlyStopping
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import cohen_kappa_score, roc_auc_score

Using TensorFlow backend.


In [2]:
# Logistic Regression Function

def lrcv(k, X, y):
    skf = StratifiedKFold(n_splits=k, shuffle=True)
    # Train a logistic regression for each fold
    aucs = []
    kappas = []
    # Calculate metrics for each affect
    for y_i in y.T:
        for train_index, test_index in skf.split(X, y_i):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y_i[train_index], y_i[test_index]
            log_reg = LogisticRegression(max_iter=10000, solver='lbfgs')
            log_reg.fit(X_train, y_train)
            y_pred = log_reg.predict(X_test)
            aucs.append(roc_auc_score(y_test, y_pred))
            kappas.append(cohen_kappa_score(y_test, np.around(y_pred)))
    return np.mean(aucs), np.mean(kappas)

In [None]:
# BOREP

input_data = pk.load(open('input_data.pkl', 'rb'))
target_data = pk.load(open('target_data.pkl', 'rb'))

pooling = []
dimention = []
auc = []
kappa = []

for pooling_name, pooling_function in zip(['Max Pooling', 'Mean Pooling'], [np.max, np.mean]):
    for dim in np.power(2, np.arange(12)):
        print(pooling_name, dim)
        aucs = []
        kappas = []
        for i in range(5):
            random_embedding = (np.random.rand(input_data[0].shape[1], dim) * 2 - 1) * np.sqrt(input_data[0].shape[1])
            log_input = []
            for input_batch in input_data:
                embedded_input = np.dot(input_batch, random_embedding)
                embedded_input = pooling_function(embedded_input, axis=0)
                log_input.append(embedded_input)
            log_input = np.stack(log_input)
            # Get the average auc and kappa for all affects and folds
            mean_auc, mean_kappa = lrcv(5, log_input, np.array(target_data).astype(int))
            aucs.append(mean_auc)
            kappas.append(mean_kappa)
        pooling.append(pooling_name)
        dimention.append(dim)
        auc.append(np.mean(aucs))
        kappa.append(np.mean(kappas))

Max Pooling 1
Max Pooling 2
Max Pooling 4
Max Pooling 8
Max Pooling 16
Max Pooling 32
Max Pooling 64




Max Pooling 128




In [None]:
plt.figure()
plt.plot(dimention[:12], auc[:12], marker='.', label='Max Pooling')
plt.plot(dimention[12:], auc[12:], marker='.', label='Mean Pooling')
plt.xlabel('Projected Dimensions')
plt.ylabel('ROC AUC')
plt.title('Bag of Random Embeddings Results')
plt.legend()
plt.show()

plt.figure()
plt.plot(dimention[:12], kappa[:12], marker='.', label='Max Pooling')
plt.plot(dimention[12:], kappa[12:], marker='.', label='Mean Pooling')
plt.xlabel('Projected Dimensions')
plt.ylabel('Cohen\'s Kappa')
plt.title('Bag of Random Embeddings Results')
plt.legend()
plt.show()

In [None]:
# RLSTM

input_data = pk.load(open('input_data.pkl', 'rb'))
target_data = pk.load(open('target_data.pkl', 'rb'))

layers = []
dimention = []
auc = []
kappa = []

for lay in range(1,4):
    for dim in np.power(2, np.arange(12)):
        print(lay, dim)
        aucs = []
        kappas = []
        for i in range(5):
            # Make the LSTM
            rand_lstm = Sequential()
            rand_lstm.add(LSTM(dim, activation='sigmoid', return_sequences=True, input_shape=(1, input_data[0].shape[1])))
            for l in range(lay-1):
                rand_lstm.add(LSTM(dim, activation='sigmoid', return_sequences=True))
            rand_lstm.compile(optimizer='adam', loss='mse')
            # Project each input into higher dimensions
            log_input = []
            for input_batch in input_data:
                input_batch = input_batch.reshape(input_batch.shape[0], 1, input_batch.shape[1])
                batch_out = rand_lstm.predict(input_batch)
                log_input.append(batch_out[-1,:,:].flatten())
                rand_lstm.reset_states()
            log_input = np.stack(log_input)
            # Get the average auc and kappa for all affects and folds
            mean_auc, mean_kappa = lrcv(5, log_input, np.array(target_data).astype(int))
            aucs.append(mean_auc)
            kappas.append(mean_kappa)
        layers.append(lay)
        dimention.append(log_input.shape[1])
        auc.append(np.mean(aucs))
        kappa.append(np.mean(kappas))

In [None]:
x = 12

plt.figure()
for i in range(3):
    plt.plot(dimention[i*x:i*x+x], auc[i*x:i*x+x], marker='.', label=f'{i+1} Layer LSTM')
plt.xlabel('Projected Dimensions')
plt.ylabel('ROC AUC')
plt.title('Random LSTM Results')
plt.legend()
plt.show()

plt.figure()
for i in range(3):
    plt.plot(dimention[i*x:i*x+x], kappa[i*x:i*x+x], marker='.', label=f'{i+1} Layer LSTM')
plt.xlabel('Projected Dimensions')
plt.ylabel('Cohen\'s Kappa')
plt.title('Random LSTM Results')
plt.legend()
plt.show()

In [None]:
input_data = pk.load(open('input_data.pkl', 'rb'))
target_data = pk.load(open('target_data.pkl', 'rb'))

layers = []
dimention = []
auc = []
kappa = []

for lay in range(1,4):
    for dim in np.power(2, np.arange(12)):
        print(lay, dim)
        aucs = []
        kappas = []
        for i in range(5):
            # Make the LSTM
            rand_lstm = Sequential()
            rand_lstm.add(LSTM(dim, activation='sigmoid', return_sequences=True, input_shape=(1, input_data[0].shape[1])))
            for l in range(lay-1):
                rand_lstm.add(LSTM(dim, activation='sigmoid', return_sequences=True))
            rand_lstm.compile(optimizer='adam', loss='mse')
            outputs =[l.output for l in rand_lstm.layers]
            inter_model = keras.Model(inputs=rand_lstm.inputs, outputs=outputs)
            # Project each input into higher dimensions
            log_input = []
            for input_batch in input_data:
                input_batch = input_batch.reshape(input_batch.shape[0], 1, input_batch.shape[1])
                batch_out = np.stack(inter_model.predict(input_batch))
                batch_out = batch_out[-1,:,:].flatten() if lay == 1 else batch_out[:,-1,:,:].flatten()
                log_input.append(batch_out)
                rand_lstm.reset_states()
            log_input = np.stack(log_input)
            # Get the average auc and kappa for all affects and folds
            mean_auc, mean_kappa = lrcv(5, log_input, np.array(target_data).astype(int))
            aucs.append(mean_auc)
            kappas.append(mean_kappa)
        layers.append(lay)
        dimention.append(log_input.shape[1])
        auc.append(np.mean(aucs))
        kappa.append(np.mean(kappas))

In [None]:
x = 12

plt.figure()
for i in range(3):
    plt.plot(dimention[i*x:i*x+x], auc[i*x:i*x+x], marker='.', label=f'{i+1} Layer LSTM')
plt.xlabel('Projected Dimensions')
plt.ylabel('ROC AUC')
plt.title('Random LSTM Results')
plt.legend()
plt.show()

plt.figure()
for i in range(3):
    plt.plot(dimention[i*x:i*x+x], kappa[i*x:i*x+x], marker='.', label=f'{i+1} Layer LSTM')
plt.xlabel('Projected Dimensions')
plt.ylabel('Cohen\'s Kappa')
plt.title('Random LSTM Results')
plt.legend()
plt.show()