In [8]:
import scipy.io
import pandas as pd
import numpy as np 
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score, cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.covariance import ledoit_wolf
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, roc_auc_score
import scipy.stats as stats
from mat_preproc import preproc
from sklearn.metrics.pairwise import cosine_similarity

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set_style("darkgrid")

# Encodings for Each Label

## *source information*

1. SC (Source Correct)
2. CR (Correct Rejection)
3. SI (Source Incorrect)
4. Miss 
5. FA  (False Alarm)

## *label for the source response*

1. RS (Remember Source)
2. RO (Remember Other)
3. F (Familiarity)
4. MN (Maybe New) 
5. SN (Sure New)


In [9]:
# class attributes
source_info = ["SC", "CR", "SI", "M", "FA"]
response_info = ["RS", "RO", "F", "MN", "SN"]

# the x-axis on the projection graph
x_axis = [(1, 1), (3, 1), (5, 1), (1, 2), (5, 2), (1, 3), (3, 3), (5, 3), (4, 4), (2, 4), (4, 5), (2, 5)]

In [10]:
for source, resp in x_axis:
    print(f"{source_info[source-1]}-{response_info[resp-1]}")

SC-RS
SI-RS
FA-RS
SC-RO
FA-RO
SC-F
SI-F
FA-F
M-MN
CR-MN
M-SN
CR-SN


# Preproc

In [11]:
# multiple group
file_path = "data_imbalLDA_1.mat"

data_preproc = preproc(file_path, experiment_num=1)
pos1, neg1 = data_preproc.filter_index(2,5,2,4)
pos2, neg2 = data_preproc.filter_index(4,5,4,4)

pos_idx, neg_idx = data_preproc.merge_two_class(pos1, neg1, pos2, neg2)
X, y, subject = data_preproc.get_data_by_index(pos_idx, neg_idx)
X.shape

(3813, 72)

In [12]:
subject.shape

(3813,)

In [13]:
y.shape

(3813,)

In [18]:
sc = StandardScaler()  # standardize features
X = sc.fit_transform(X)
s = ledoit_wolf(X)
s[1]

0.0024978800314301485

# Replicate Acc based on Kueida's Method

Though unable to find the exact implementations and records of how Kueida calculated the reported values, we scrupulously followed the descriptions in the literature and replicated the result as best we could. 

First, leave-one-subject-out cross-validation was built. Within each fold, an LDA, with auto shrinkage along with an eigen solver, was fitted on the training data. Subsequently, the model's accuracy was evaluated based on the left-out subject. Since the literature mentioned that “The accuracy of the classifiers were calculated on balanced test data,” when using an imbalanced test-set (left-out subject) to evaluate the model’s performance within the fold, we randomly drop out the observations from the outweighed class (the class that has more observation from other) and calculate the accuracy based on the balanced test set. We repetitively balanced the test-set 10 times to smooth the randomness effect. 
 

In [10]:
def cal_acc_balanced(clf, trail_num):
    """
    A summary function that calculate the accuracy in the literature
    based the aforementioned approach. 
    
    clf and trail_num can be used to specify which classifier and which
    experiment that we wish to learn.
    """
    file_path = f"data_imbalLDA_{trail_num}.mat"
    data_preproc = preproc(file_path, trail_num)
    if clf == "SN_MN":
        pos1, neg1 = data_preproc.filter_index(2,5,2,4)
        pos2, neg2 = data_preproc.filter_index(4,5,4,4)
    elif clf == "F_CR":
        pos1, neg1 = data_preproc.filter_index(1,3,2,4)
        pos2, neg2 = data_preproc.filter_index(3,3,2,5)
    else:
        raise ValueError("Unknown Classifier. Should be either `SN_MN` or `F_CR`")
    pos_idx, neg_idx = data_preproc.merge_two_class(pos1, neg1, pos2, neg2)
    X, y, subject = data_preproc.get_data_by_index(pos_idx, neg_idx)

    logo = LeaveOneGroupOut()

    scores = []

    for train_idx, test_idx in logo.split(X, y, subject):
        X_train, y_train = X[train_idx,:], y[train_idx]
        X_test, y_test = X[test_idx,:], y[test_idx]
        LDA = LinearDiscriminantAnalysis(shrinkage = None, solver = 'eigen')
        LDA.fit(X_train, y_train)
        # randomly drop datapoint to balance class
        pos_idx, neg_idx = np.arange(len(test_idx))[y_test == 1], np.arange(len(test_idx))[y_test != 1]
        pos_len, neg_len = len(pos_idx), len(neg_idx)
        acc = []
        for _ in range(10):
            if pos_len > neg_len:
                # when there are more positive class than negative
                # randomly drop positive class to equivalent the negative class
                pos_chosen = np.random.choice(pos_idx, neg_len, replace=False)
                neg_chosen = neg_idx
            else:
                pos_chosen = pos_idx
                neg_chosen = np.random.choice(neg_idx, pos_len, replace=False)
            filter_test_idx = np.concatenate([pos_chosen, neg_chosen])
            X_test_balanced, y_test_balanced = X_test[filter_test_idx, :], y_test[filter_test_idx]
            assert sum(y_test_balanced) == 0 # to check whether they are balanced class
            acc.append(LDA.score(X_test_balanced, y_test_balanced))
        acc = np.array(acc)
        scores.append(acc)
    scores = np.array(scores)
    return scores.mean()

## Reports of Accuracy

The reported values is in the image. The accuracy we generated is consistently lower than the reported values.
![reported_table](img/reported_table.png)

In [11]:
np.random.seed(42)
accs = [
    [cal_acc_balanced("SN_MN", 1), cal_acc_balanced("F_CR", 1)],
    [cal_acc_balanced("SN_MN", 2),  cal_acc_balanced("F_CR", 2)]
]
accs

[[0.5304756678134714, 0.515777909215031],
 [0.5559914866368215, 0.5130120075747037]]

![summary](img/sim_vs_reported.png)

acc reported in the literature is 0.5653

# Test the AUROC

In [12]:
file_path = "data_imbalLDA_1.mat"
data_preproc = preproc(file_path, 1)
# combine SN and MN
pos1, neg1 = data_preproc.filter_index(2,5,2,4)
pos2, neg2 = data_preproc.filter_index(4,5,4,4)

pos_idx, neg_idx = data_preproc.merge_two_class(pos1, neg1, pos2, neg2)
X, y, subject = data_preproc.get_data_by_index(pos_idx, neg_idx)

logo = LeaveOneGroupOut()

scores = []

y_pred_prob = []
y_true = []

for train_idx, test_idx in logo.split(X, y, subject):
    X_train, y_train = X[train_idx,:], y[train_idx]
    X_test, y_test = X[test_idx,:], y[test_idx]
    LDA = LinearDiscriminantAnalysis(shrinkage = 'auto', solver = 'eigen')
    LDA.fit(X_train, y_train)
    y_pred_prob.append(LDA.predict_proba(X_test)[:,1])
    y_true.append(y_test)
y_pred_prob, y_true = np.concatenate(y_pred_prob), np.concatenate(y_true)
roc_auc_score(y_true, y_pred_prob)

0.575871522339723

The literature report this number as 0.5564

In [13]:
file_path = "data_imbalLDA_2.mat"

data_preproc = preproc(file_path, 2)
# combine SN and MN
pos1, neg1 = data_preproc.filter_index(2,5,2,4)
pos2, neg2 = data_preproc.filter_index(4,5,4,4)

pos_idx, neg_idx = data_preproc.merge_two_class(pos1, neg1, pos2, neg2)
X, y, subject = data_preproc.get_data_by_index(pos_idx, neg_idx)

logo = LeaveOneGroupOut()

scores = []

y_pred_prob = []
y_true = []

for train_idx, test_idx in logo.split(X, y, subject):
    X_train, y_train = X[train_idx,:], y[train_idx]
    X_test, y_test = X[test_idx,:], y[test_idx]
    LDA = LinearDiscriminantAnalysis(shrinkage = 'auto', solver = 'eigen')
    LDA.fit(X_train, y_train)
    y_pred_prob.append(LDA.predict_proba(X_test)[:,1])
    y_true.append(y_test)
y_pred_prob, y_true = np.concatenate(y_pred_prob), np.concatenate(y_true)
roc_auc_score(y_true, y_pred_prob)

0.6373880191345944

The literature report this number as 0.5997

# Unbalanced

In [22]:
def cal_acc_unbalanced(clf, trail_num):
    """
    A summary function that calculate the accuracy in the literature
    based the aforementioned approach. 
    
    clf and trail_num can be used to specify which classifier and which
    experiment that we wish to learn.
    """
    file_path = f"data_imbalLDA_{trail_num}.mat"
    data_preproc = preproc(file_path, trail_num)
    if clf == "SN_MN":
        pos1, neg1 = data_preproc.filter_index(2,5,2,4)
        pos2, neg2 = data_preproc.filter_index(4,5,4,4)
    elif clf == "F_CR":
        pos1, neg1 = data_preproc.filter_index(1,3,2,4)
        pos2, neg2 = data_preproc.filter_index(3,3,2,5)
    else:
        raise ValueError("Unknown Classifier. Should be either `SN_MN` or `F_CR`")
    pos_idx, neg_idx = data_preproc.merge_two_class(pos1, neg1, pos2, neg2)
    X, y, subject = data_preproc.get_data_by_index(pos_idx, neg_idx)

    logo = LeaveOneGroupOut()

    scores = []

    for train_idx, test_idx in logo.split(X, y, subject):
        X_train, y_train = X[train_idx,:], y[train_idx]
        X_test, y_test = X[test_idx,:], y[test_idx]
        LDA = LinearDiscriminantAnalysis(shrinkage = None, solver = 'eigen')
        LDA.fit(X_train, y_train)
        acc = []
        acc.append(LDA.score(X_test, y_test))
        acc = np.array(acc)
        scores.append(acc)
    scores = np.array(scores)
    return scores

In [23]:
accs = cal_acc_unbalanced("SN_MN", 1)
accs.mean()

0.5516121079202426

Matlab: 
```
>> mean(accs)

ans =

    0.5463
```

In [26]:
accs = cal_acc_unbalanced("F_CR", 1)
accs.mean()

0.6833936763968906

Matlab: 
```
>> mean(accs)

ans =

    0.5110
```