In [1]:
import scipy.io
import pandas as pd
import numpy as np 
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score, cross_val_predict
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, roc_auc_score
import scipy.stats as stats
from mat_preproc import preproc

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
%config InlineBackend.figure_format='retina'
sns.set_style("darkgrid")

import matlab.engine
eng = matlab.engine.start_matlab()

# Encodings for Each Label

## *source information*

1. SC (Source Correct)
2. CR (Correct Rejection)
3. SI (Source Incorrect)
4. Miss 
5. FA  (False Alarm)

## *label for the source response*

1. RS (Remember Source)
2. RO (Remember Other)
3. F (Familiarity)
4. MN (Maybe New) 
5. SN (Sure New)


In [2]:
# class attributes
source_info = ["SC", "CR", "SI", "M", "FA"]
response_info = ["RS", "RO", "F", "MN", "SN"]

# the x-axis on the projection graph
x_axis = [(1, 1), (3, 1), (5, 1), (1, 2), (5, 2), (1, 3), (3, 3), (5, 3), (4, 4), (2, 4), (4, 5), (2, 5)]

# Summary Function

In [3]:
def clf_summary(clf, exp):
    
    file_path = f"data_imbalLDA_{exp}.mat"
    data_preproc = preproc(file_path, exp)
    if clf == "SN_vs_MN":
        pos1, neg1 = data_preproc.filter_index(2,5,2,4)
        pos2, neg2 = data_preproc.filter_index(4,5,4,4)
    elif clf == "F_vs_CR":
        pos1, neg1 = data_preproc.filter_index(1,3,2,4)
        pos2, neg2 = data_preproc.filter_index(3,3,2,5)
    else:
        raise ValueError("Unknown Classifier")
    pos_idx, neg_idx = data_preproc.merge_two_class(pos1, neg1, pos2, neg2)
    X, y, subject = data_preproc.get_data_by_index(pos_idx, neg_idx, eliminate_trails = False)
    print(f"Summary for Clf: {clf}, Exp: {exp}")
    print(f"the shape of the training features is {X.shape}")

    ## use shrinkage 0 to fit the whole data and test on the whole data
    ## different solvers give the same result
    LDA = LinearDiscriminantAnalysis(shrinkage = 0, solver = 'eigen')
    LDA.fit(X, y)
    print(f"the accuracy for experiment {exp} {clf} is {LDA.score(X, y)}")

#     ## Projection graph
#     pos_idx = [10, 11]
#     neg_idx = [8, 9]
#     data_preproc.generate_projections(LDA, pos_idx, neg_idx, X, y, subject)
#     plt.title(f"{clf} Projection Experiment {exp}", fontsize = 12)
    

## Testing in different classifier and experiment number

### SN vs MN, Exp 1

In [4]:
# Python & Sklearn
clf_summary("SN_vs_MN", 1)

Summary for Clf: SN_vs_MN, Exp: 1
the shape of the training features is (3898, 72)
the accuracy for experiment 1 SN_vs_MN is 0.616213442791175


In [5]:
# Matlab & Kueida
eng.LDA_simplest_test("SN_vs_MN", 1, nargout=1)

The shape of the training set is
        3898          72

the accuracy for experiment 1 SN_vs_MN is: 0.60749


0.6074910210364289

### SN vs MN, Exp 2

In [6]:
# Python & Sklearn
clf_summary("SN_vs_MN", 2)

Summary for Clf: SN_vs_MN, Exp: 2
the shape of the training features is (3133, 72)
the accuracy for experiment 2 SN_vs_MN is 0.6878391318225343


In [7]:
# Matlab & Kueida
eng.LDA_simplest_test("SN_vs_MN", 2, nargout=1)

The shape of the training set is
        3133          72

the accuracy for experiment 2 SN_vs_MN is: 0.61985


0.6198531758697734

### F vs CR, Exp 1

In [8]:
# Python & Sklearn
clf_summary("F_vs_CR", 1)

Summary for Clf: F_vs_CR, Exp: 1
the shape of the training features is (3703, 72)
the accuracy for experiment 1 F_vs_CR is 0.7051039697542533


In [9]:
# Matlab & Kueida
eng.LDA_simplest_test("F_vs_CR", 1, nargout=1)

The shape of the training set is
        3703          72

the accuracy for experiment 1 F_vs_CR is: 0.6087


0.6086956521739131

### F vs CR, Exp 2

In [10]:
# Python & Sklearn
clf_summary("F_vs_CR", 2)

Summary for Clf: F_vs_CR, Exp: 2
the shape of the training features is (3326, 72)
the accuracy for experiment 2 F_vs_CR is 0.7384245339747444


In [11]:
# Matlab & Kueida
eng.LDA_simplest_test("F_vs_CR", 2, nargout=1)

The shape of the training set is
        3326          72

the accuracy for experiment 2 F_vs_CR is: 0.59321


0.5932050511124474