In [None]:
%matplotlib qt
%gui qt5
%matplotlib inline

In [None]:
import pandas as pd
import numpy as np
import os
import os.path as op

from os.path import exists
import csv
import matplotlib.pyplot as plt
import mne

from mne.decoding import SlidingEstimator
from sklearn.utils import shuffle
import sklearn
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import ElasticNet
import batch_average as ba

# to save trained models and load using for making predictions
from joblib import dump, load


import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics

### Importing preprocessed data - epochs

In [None]:
overwrite = True
eeg_path = r'D:\Master_thesis\Pre-processed_data'
data_path_out = r'D:\Master_thesis\Figures\Stereovsmono'
data_path_out_scores = r'D:\Master_thesis\Scores\Stereovsmono'
data_path_out_predictions = r'D:\Master_thesis\Predictions\Stereovsmono'
path_model_output = r'D:\Master_thesis\TrainedModels\Stereovsmono'

# Multiclass

In [None]:
l1_ratio = [0.1, 0.01]
C = [0.5]
scoring = 'accuracy'

retrain_model = True


n_split= np.arange(1, 6, dtype=int)
# n_split= [1]
subjects = ['VR2FEM_S01', 'VR2FEM_S02', 'VR2FEM_S03', 'VR2FEM_S04', 'VR2FEM_S05','VR2FEM_S06','VR2FEM_S07',
        'VR2FEM_S08','VR2FEM_S10','VR2FEM_S11','VR2FEM_S12', 'VR2FEM_S14', 
        'VR2FEM_S15', 'VR2FEM_S17', 'VR2FEM_S18', 'VR2FEM_S21']

# subjects = ['VR2FEM_S18']

viewing_conditions = ["stereo/happy", "stereo/neutral", "stereo/angry", "stereo/surprised"], \
                    ["mono/happy", "mono/neutral", "mono/angry", "mono/surprised"]
# os.listdir(eeg_path)

monitor_df = pd.DataFrame(columns=["subject_id", "viewing_condition", "viewing_type", "l1_ratio", "cs", "split",
                                  "scores", "predictions", "y_test"])

for subject in subjects:
# directory management
    model_dir_save= os.path.join(path_model_output, subject)
    if not op.exists(model_dir_save):
        os.makedirs(model_dir_save)
    
    data_path = os.path.join(eeg_path, subject)
    #reading preprocessed data -epochs
    
    epochs_train_list = []
    epochs_test_list = []
    
    
    print("----------------1.1 Start Reading of Data ----------------")
    for items in n_split:
        
        epochs_train_name = os.path.join(f"{data_path}\{subject}-preprocessed_train_{items}-epo.fif")
        epochs_test_name = os.path.join(f"{data_path}\{subject}-preprocessed_test_{items}-epo.fif")
        epochs_train = mne.read_epochs(epochs_train_name, preload=True)
        epochs_test = mne.read_epochs(epochs_test_name, preload=True)
        epochs_train_list.append(epochs_train)
        epochs_test_list.append(epochs_test)
        
    print("----------------1.1 End Reading of Data ----------------")
    
    for view in viewing_conditions:
        print("----------------1.2 Pipeline Construction ----------------")
        print(f"Viewing Condition: {view}")
        viewing_type = view[0].split("/")[0]
        
        scores_avg_all = []
        predictions_all = []
        y_test_all = []
        
        for l1 in l1_ratio:
            for cs in C:
                clf = make_pipeline(
                            StandardScaler(),
                            LogisticRegression(multi_class='ovr', max_iter = 4000, solver='saga',
                                               penalty='elasticnet', l1_ratio=l1, C=cs)
                    )
                print("----------------Start Split ----------------")

                scores_all = []
                y_test_split=[]
                predictions_split=[]
                
                for split in n_split:
                    model_output_file_name = f"{model_dir_save}\{subject}_LogisticsRegression_\
                                                L1_{l1}_C_{cs}_split_{split}.joblib"

                    model_exists = exists(model_output_file_name)
                    print(f"model_exists {model_exists} for {model_output_file_name}")
                    if model_exists:
                        print(f"Trained Model exists file name: {model_output_file_name}")
                    if model_exists is False or retrain_model is True:

                        print(f"Train/test split: {items}")
                        print("Running reg. ratio for: ",l1)
                        print("Running reg. strength for: ",cs)

                        print(f"Subject: {subject} Number of test/train split: {split} for L1 ratio : {l1} \
                                and strength: {cs}")
                        n = split-1

                        epochs_train = epochs_train_list[n][view].copy().crop(0.14, 0.20)
                        events_train = epochs_train_list[n][view].events[:,2]
                        x_train = np.mean(epochs_train, axis = 2)
                        y_train = [int(str(yy)[-1]) for yy in events_train]

                        epochs_test = epochs_test_list[n][view].copy().crop(0.14, 0.20)
                        events_test = epochs_test_list[n][view].events[:,2]
                        x_test = np.mean(epochs_test, axis = 2)
                        y_test = [int(str(yy)[-1]) for yy in events_test] 

                        clf.fit(x_train, y_train)
                        predictions = clf.predict(x_test)
                        scores = clf.score(x_test, y_test)
                        print("---scores---", scores)
                        scores_all.append(scores)
                        predictions_split.append(predictions)
                        y_test_split.append(y_test)
                        print(y_test_split)
                        print(f"----------------Saving Trained Models to files: {model_output_file_name} ----------------")

                        monitor_df = monitor_df.append({
                                "subject_id": subject,
                                "viewing_condition": view,
                                "viewing_type": viewing_type,
                                "l1_ratio": l1,
                                "cs": cs,
                                "split": split,
                                "scores": scores,
                                "predictions": predictions,
                                "y_test": y_test,
                            },
                            ignore_index=True)
    
                if len(scores_all) > 0:
                    print("----------------Ending Split ----------------")
                    scores_CV = np.vstack(scores_all)
                    print(scores_CV.shape)
                    scores_avg = np.mean(scores_CV, axis=0)
                    print("scores_avg.shape", scores_avg.shape)
                    scores_avg_all.append(scores_avg)
                    y_test_all.append(y_test_split)
                    predictions_all.append(predictions_split)



        scores_avg_best = max(scores_avg_all)
        print(scores_avg_all)
        print("scores_all", scores_avg_best)
        result = np.where(scores_avg_all == max(scores_avg_all))
        print("result", result[0])
        best_model_index = int(result[0])
        print("best_model_index", type(best_model_index))
        print("predictions_all", predictions_all[0][best_model_index])
        print("y_test_all", y_test_all[0][best_model_index])
        
        predictions_best_model = predictions_all[0][best_model_index]
        y_test_best_model = y_test_all[0][best_model_index]
        


In [None]:
subject_models_df = monitor_df.groupby(['subject_id', 'viewing_type', 'l1_ratio', 'cs']).agg(
    scores=pd.NamedAgg(column='scores', aggfunc='mean'),
    predictions=pd.NamedAgg(column='predictions', aggfunc=np.hstack),
    y_test=pd.NamedAgg(column='y_test', aggfunc=np.hstack)
).reset_index()
subject_models_df

In [None]:
# -------------------------- best model logic per subject and viewing type---------------------------

best_model_index = subject_models_df.\
    groupby(['subject_id', 'viewing_type'])['scores'].transform(max) == subject_models_df['scores']

# print(best_model_index)
subject_models_df[best_model_index]

# subject_models_df.head()

In [None]:
# -------------------------- combine results for all subjects based on viewing type---------------------------

combine_model_output = subject_models_df[best_model_index].groupby(['viewing_type']).agg(
    scores=pd.NamedAgg(column='scores', aggfunc='mean'),
    predictions=pd.NamedAgg(column='predictions', aggfunc=np.hstack),
    y_test=pd.NamedAgg(column='y_test', aggfunc=np.hstack)
).reset_index()
combine_model_output

# subject_models_df[best_model_index]

In [None]:
viewing_types = combine_model_output["viewing_type"].unique()
# print(viewing_types)
for view in viewing_types:
    print("Running confusion matrix for: ", view)
    final_cm_df = combine_model_output[combine_model_output.viewing_type == view]
    
    
    y_test = final_cm_df['y_test'].tolist()[0]
    predictions = final_cm_df['predictions'].tolist()[0]
    cm = metrics.confusion_matrix(y_test, predictions)
    print(cm)

    final_cm_score = round(final_cm_df['scores'].tolist()[0],3)
    plt.figure(figsize=(9,9))
    sns.heatmap(cm, annot=True, fmt=".3f", linewidths=.5, square = True, cmap = 'Blues_r');
    plt.ylabel('Actual label');
    plt.xlabel('Predicted label');
    all_sample_title = f'Accuracy Score for viewing type {view}: {final_cm_score}'
    plt.title(all_sample_title, size = 15);