In [1]:
import scipy.io
import pandas as pd
import numpy as np 
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score
import scipy.stats as stats

In [2]:
#Load in the MATLAB file with all the data
data = scipy.io.loadmat('data_CRM_SN_vs_MN_imbalLDA_order_proj_1.mat')

print(data.keys())

dict_keys(['__header__', '__version__', '__globals__', 'user_class_min_1', 'user_feat_1', 'user_prob_1', 'user_resp_1', 'user_source_1', 'user_tr_order_1', 'user_train_prob_1', 'user_weights_1'])


In [9]:
data["user_resp_1"][0][0].shape

(432, 1)

In [10]:
#Had to index them like this because there was an unnecessary dimension 

# user trail order
tr_order = data['user_tr_order_1'][0]

# projection scores
proj_score = data['user_prob_1'][0]

# source and response label
source_label = data['user_source_1'][0]
resp_label = data['user_resp_1'][0]

# features, group the channels and average over the windows,
# those are called features, and we use the features for training
behav_feat = data['user_feat_1'][0]

len(resp_label[0])

432

In [26]:
tr_order[0].shape, tr_order[1].shape

((432, 1), (134, 1))

In [31]:
source_label[0].shape

(432, 1)

In [33]:
proj_score[0].shape

(432, 1)

# Encodings for Each Label

## *source information*

1. SC (Source Correct)
2. CR (Correct Rejection)
3. SI (Source Incorrect)
4. Miss 
5. FA  (False Alarm)

## *label for the source response*

1. RS (Remember Source)
2. RO (Remember Other)
3. F (Familiarity)
4. MN (Maybe New) 
5. SN (Sure New)


In [11]:
source_info = ["SC", "CR", "SI", "M", "FA"]
response_info = ["RS", "RO", "F", "MN", "SN"]

#Function to help prepared the appropriate labels
def prepare_labels(pos_source_label, pos_resp_label,
                   neg_source_label, neg_response_label):
    """
    Preaparing the positive and negative class label in a multi-class 
    classification senarios
    
    Parameters:
    -----------
    pos_source_label : int
        the positive class's source label.
        for details, please refer to the above encodings
    pos_resp_label : int
        the positive class's response label
    neg_source_label : int
        the negative class's source label
    neg_response_label : int
        the negative class's response label
        
    Returns:
    --------
    data_x : np.ndarray
        the input for the formatted data
    data_y : np.ndarray
        the ground truth label
    data_subject : np.ndarray
        th subject number that corresponds to the data_x and data_y
    """
    temp_data_x = []
    temp_data_y = []
    temp_data_s = []
    
    # parse the label into human readable format
    positive_label = source_info[pos_source_label] + response_info[pos_resp_label]
    negative_label = source_info[neg_source_label] + response_info[neg_resp_label]
    
    # keep track of each subject number, 
    # for later LOSO
    
    for subject, zipped in enumerate(zip(source_label,resp_label,behav_feat)):
        source, response, behavior_feature = zipped
        # use the logical intersection to subtract out the indices 
        # of the positive and negative class
        pos_index = (
            (source.flatten()==pos_source_label) &
            (response.flatten()==pos_resp_label)
                    )
        neg_index = (
            (source.flatten()==neg_source_label) & 
            (response.flatten()==neg_response_label)
        )
        
        temp_data_x.append(behavior_feature[pos_index,:])
        temp_data_x.append(behavior_feature[neg_index,:])
        
        temp_data_y.append([positive_label for x in behavior_feature[pos_index,:]])
        temp_data_y.append([negative_label for x in behavior_feature[neg_index,:]])
        
        temp_data_s.append([subject for x in behavior_feature[pos_index,:]])
        temp_data_s.append([subject for x in behavior_feature[neg_index,:]])
        
    # I don't think those steps are necessary.
    # data_x = np.vstack(temp_data_x)
    # data_y = np.concatenate(temp_data_y)
    # data_s = np.concatenate(temp_data_s)
    data_x = np.array(temp_data_x)
    data_y = np.array(temp_data_y)
    data_subject = np.array(temp_data_s)
    return data_x, data_y, data_subject

In [20]:
source_label[3].shape

(107, 1)