In [45]:
import scipy.io
import pandas as pd
import numpy as np 
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score, cross_val_predict
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, roc_auc_score
import scipy.stats as stats
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize

from mat_preproc import preproc

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
%config InlineBackend.figure_format='retina'
sns.set_style("darkgrid")

import matlab.engine
eng = matlab.engine.start_matlab()

# Encodings for Each Label

## *source information*

1. SC (Source Correct)
2. CR (Correct Rejection)
3. SI (Source Incorrect)
4. Miss 
5. FA  (False Alarm)

## *label for the source response*

1. RS (Remember Source)
2. RO (Remember Other)
3. F (Familiarity)
4. MN (Maybe New) 
5. SN (Sure New)


In [46]:
# class attributes
source_info = ["SC", "CR", "SI", "M", "FA"]
response_info = ["RS", "RO", "F", "MN", "SN"]

# the x-axis on the projection graph
x_axis = [(1, 1), (3, 1), (5, 1), (1, 2), (5, 2),
          (1, 3), (3, 3), (5, 3), (4, 4), (2, 4),
          (4, 5), (2, 5)]

# Summary Function

In [47]:
def clf_summary(clf, exp):
    
    file_path = f"data_imbalLDA_{exp}.mat"
    data_preproc = preproc(file_path, exp)
    if clf == "SN_vs_MN":
        pos1, neg1 = data_preproc.filter_index(2,5,2,4)
        pos2, neg2 = data_preproc.filter_index(4,5,4,4)
    elif clf == "F_vs_CR":
        pos1, neg1 = data_preproc.filter_index(1,3,2,4)
        pos2, neg2 = data_preproc.filter_index(3,3,2,5)
    else:
        raise ValueError("Unknown Classifier")
    pos_idx, neg_idx = data_preproc.merge_two_class(pos1, neg1, pos2, neg2)
    X, y, subject = data_preproc.get_data_by_index(pos_idx, neg_idx, eliminate_trails = False)
    print(f"Summary for Clf: {clf}, Exp: {exp}")
    print(f"the shape of the training features is {X.shape}")

    ## use shrinkage 0 to fit the whole data and test on the whole data
    ## different solvers give the same result
    LDA = LinearDiscriminantAnalysis(shrinkage = 0, solver = 'eigen')
    LDA.fit(X, y)
    print(f"the accuracy for experiment {exp} {clf} is {LDA.score(X, y)}")
    return LDA.coef_, LDA.intercept_

#     ## Projection graph
#     pos_idx = [10, 11]
#     neg_idx = [8, 9]
#     data_preproc.generate_projections(LDA, pos_idx, neg_idx, X, y, subject)
#     plt.title(f"{clf} Projection Experiment {exp}", fontsize = 12)
    

## Testing in different classifier and experiment number

### SN vs MN, Exp 1

In [48]:
# Python & Sklearn
W_python, b = clf_summary("SN_vs_MN", 1)

Summary for Clf: SN_vs_MN, Exp: 1
the shape of the training features is (3898, 72)
the accuracy for experiment 1 SN_vs_MN is 0.616213442791175


In [50]:
W_matlab = """-0.0083
0.0062
0.0015
0.0099
-0.0198
0.0017
-0.0079
0.0133
0.0005
0.0020
-0.0145
-0.0052
0.0005
-0.0004
0.0206
-0.0220
-0.0152
0.0169
0.0185
0.0016
-0.0048
-0.0135
0.0184
0.0002
0.0165
-0.0077
-0.0169
0.0232
0.0225
-0.0110
-0.0030
-0.0155
0.0030
0.0019
0.0365
-0.0212
-0.0058
0.0174
0.0001
-0.0114
-0.0277
0.0338
-0.0082
-0.0120
0.0105
0.0097
-0.0116
-0.0053
-0.0006
-0.0123
-0.0135
0.0118
0.0145
-0.0194
0.0012
0.0136
-0.0176
0.0115
0.0034
0.0035
-0.0038
0.0016
0.0181
-0.0293
-0.0225
0.0224
0.0016
-0.0030
-0.0120
0.0067
0.0156
-0.0143"""
W_matlab = np.array(W_matlab.split("\n")).astype(float)

In [51]:
cosine_similarity(W_matlab.reshape(1, -1), W_python[0].reshape(1, -1))

array([[0.99362536]])

In [6]:
# Matlab & Kueida
eng.LDA_simplest_test("SN_vs_MN", 1, nargout=1)

The shape of the training set is
        3898          72

    0.0147

   -0.0178

the accuracy for experiment 1 SN_vs_MN is: 0.60749


0.6074910210364289

In [63]:
-0.0178 * norm_mat

array([-0.00216176])

### SN vs MN, Exp 2

In [7]:
# Python & Sklearn
clf_summary("SN_vs_MN", 2)

Summary for Clf: SN_vs_MN, Exp: 2
the shape of the training features is (3133, 72)
the accuracy for experiment 2 SN_vs_MN is 0.6878391318225343


(array([[ 0.00627465, -0.00013868, -0.03199199,  0.00933314,  0.03954628,
         -0.03907256, -0.00276625, -0.03031232,  0.08160397, -0.11741294,
         -0.06031987,  0.07818503, -0.00772073,  0.02081049, -0.04397208,
          0.02613238,  0.00519555, -0.04119397, -0.00850628, -0.00480383,
          0.02968498,  0.02922219,  0.01466512, -0.02572199,  0.03444012,
          0.01984476, -0.00993128, -0.02591995,  0.05254174,  0.02932783,
          0.00734244,  0.01122934, -0.00519775,  0.01805797,  0.02168663,
         -0.00163178, -0.00145729,  0.00825575, -0.06933359,  0.07217184,
          0.02885868, -0.04805951, -0.00304482,  0.01623376,  0.01142381,
         -0.01492808, -0.03604686,  0.04691276, -0.01103037, -0.01756308,
          0.02244621, -0.02760534,  0.00450689, -0.01328483, -0.00395701,
         -0.01809876, -0.03812328,  0.05908179,  0.02733794, -0.0378579 ,
          0.0093278 ,  0.0101844 ,  0.00700879, -0.03546789, -0.01142127,
          0.03638002, -0.02373569, -0.

In [8]:
# Matlab & Kueida
eng.LDA_simplest_test("SN_vs_MN", 2, nargout=1)

The shape of the training set is
        3133          72

    0.0115

   -0.0102

the accuracy for experiment 2 SN_vs_MN is: 0.61985


0.6198531758697734

### F vs CR, Exp 1

In [None]:
# Python & Sklearn
clf_summary("F_vs_CR", 1)

In [None]:
# Matlab & Kueida
eng.LDA_simplest_test("F_vs_CR", 1, nargout=1)

### F vs CR, Exp 2

In [None]:
# Python & Sklearn
clf_summary("F_vs_CR", 2)

In [None]:
# Matlab & Kueida
eng.LDA_simplest_test("F_vs_CR", 2, nargout=1)