In [None]:
import numpy as np
import pandas as pd
import pickle as pk
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from sklearn.model_selection import KFold
from sklearn.metrics import cohen_kappa_score, roc_auc_score

In [None]:
def lrcv(k, X, y):
    
    skf = KFold(n_splits=k, shuffle=True)
    fold_aucs = []
    fold_kappas = []
    
    # Train a logistic regression for each fold
    for train_index, test_index in skf.split(X, y):
        print('processing a fold')
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        log_reg = Sequential()
        log_reg.add(Dense(y_train.shape[1], activation='sigmoid', input_shape=(X_train.shape[1],)))
        es = [EarlyStopping(monitor='loss', min_delta=0.00001)]
        log_reg.compile(optimizer='adam', loss='binary_crossentropy')
        log_reg.fit(X_train, y_train, epochs=10000, callbacks=es, verbose=False)
        y_pred = log_reg.predict(X_test)
        
        # Use the prediction to calculate metrics for each affect
        print('processing each affect')
        affect_aucs = []
        affect_kappas = []
        for y_t, y_p in zip(np.array(y_test).T, np.array(y_pred).T):
            affect_aucs.append(roc_auc_score(y_t, y_p))
            affect_kappas.append(cohen_kappa_score(y_t, np.around(y_p)))
        
        # Store the results of each affect
        fold_aucs.append(np.mean(affect_aucs))
        fold_kappas.append(np.mean(affect_kappas))
    return np.mean(fold_aucs), np.mean(fold_kappas)

In [None]:
input_data = pk.load(open('input_data.pkl', 'rb'))
target_data = pk.load(open('target_data.pkl', 'rb'))

pooling = []
dimention = []
auc = []
kappa = []

for pooling_name, pooling_function in zip(['Max Pooling', 'Mean Pooling'], [np.max, np.mean]):
    for dim in np.power(2, np.arange(12)):
        print(pooling_name, dim)
        aucs = []
        kappas = []
        for i in range(5):
            print('creating bag of random embeddings')
            random_embedding = (np.random.rand(input_data[0].shape[1], dim) * 2 - 1) * np.sqrt(input_data[0].shape[1])
            
            print('projecting input sequences')
            log_input = []
            for input_batch in input_data:
                embedded_input = np.dot(input_batch, random_embedding)
                embedded_input = pooling_function(embedded_input, axis=0)
                log_input.append(embedded_input)
            log_input = np.stack(log_input)
            
            # Get the average auc and kappa for all affects and folds
            mean_auc, mean_kappa = lrcv(5, log_input, np.array(target_data).astype(int))
            aucs.append(mean_auc)
            kappas.append(mean_kappa)
        pooling.append(pooling_name)
        dimention.append(dim)
        auc.append(np.mean(aucs))
        kappa.append(np.mean(kappas))

In [None]:
plt.figure()
plt.plot(dimention[:12], auc[:12], marker='.', label='Max Pooling')
plt.plot(dimention[12:], auc[12:], marker='.', label='Mean Pooling')
plt.xlabel('Projected Dimensions')
plt.ylabel('ROC AUC')
plt.title('Bag of Random Embeddings Results')
plt.legend()
plt.show()

plt.figure()
plt.plot(dimention[:12], kappa[:12], marker='.', label='Max Pooling')
plt.plot(dimention[12:], kappa[12:], marker='.', label='Mean Pooling')
plt.xlabel('Projected Dimensions')
plt.ylabel('Cohen\'s Kappa')
plt.title('Bag of Random Embeddings Results')
plt.legend()
plt.show()