In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
import sys
sys.path.append('/content/gdrive/MyDrive/Neuromatch Project/Colab Project')

In [4]:
# Utility functions for Model Training and Testing

from sklearn.metrics import confusion_matrix

def precision(target, prediction):
  (TN, FP), (FN, TP) = confusion_matrix(target, prediction)
  return np.divide(TP,(TP + FP))

def recall(target, prediction):
  (TN, FP), (FN, TP) = confusion_matrix(target, prediction)
  return np.divide(TP,(TP + FN))

def F1(target, prediction):
  pre = precision(target, prediction)
  rec = recall(target, prediction)

  return 2*(np.divide((pre*rec),(pre+rec)))

def FP_ratio(target, prediction):
  (TN, FP), (FN, TP) = confusion_matrix(target, prediction)
  return FP/(FP + TN)

def FN_ratio(target, prediction):
  (TN, FP), (FN, TP) = confusion_matrix(target, prediction)
  return FN/(FN + TP)

def accuracy(target, prediction):
  (TN, FP), (FN, TP) = confusion_matrix(target, prediction)
  return (TP+TN)/(TP+TN+FN+FP)


def training_model(model, X_tr, y_tr):

  print("traingin model :", model)
  return model.fit(X_tr, y_tr)

def evaluate_model(model, X_tr, y_tr, X_te, y_te, model_name, dataset_name):

  print("--"*10)
  print("Evaluating model :", model)

  train_prediction = model.predict(X_tr)
  test_prediction = model.predict(X_te)
  print(test_prediction)

  print("Training accuracy: ", accuracy(y_tr, train_prediction))
  print("Test accuracy: ", accuracy(y_te, test_prediction))
  print()
  
  print("Train Confusion Matrix is : ", confusion_matrix(y_tr, train_prediction))
  print("Test Confusion Matrix is : ", confusion_matrix(y_te, test_prediction))
  print()

  print("Precision for training is : ", precision(y_tr, train_prediction))
  print("Precision for test is : ", precision(y_te, test_prediction))
  print()

  print("Recall for training is : ", recall(y_tr, train_prediction))
  print("Recall for test is : ",recall(y_te, test_prediction))
  print()

  print("F1_Score for training is : ", F1(y_tr, train_prediction))
  print("F1_Score for test is : ", F1(y_te, test_prediction))
  print("--"*10)

  results={}
  results['Dataset_name'] = [dataset_name]
  results['Model_name'] = [model_name]
  results['Train_accuracy'] = [accuracy(y_tr, train_prediction)]
  results['Test_accuracy'] = [accuracy(y_te, test_prediction)]
  results['Train_precision'] = [precision(y_tr, train_prediction)]
  results['Test_precision'] = [precision(y_te, test_prediction)]
  results['Train_recall'] = [recall(y_tr, train_prediction)]
  results['Test_recall'] = [recall(y_te, test_prediction)]
  results['Train_F1_score'] = [F1(y_tr, train_prediction)]
  results['Test_F1_score'] = [F1(y_te, test_prediction)]

  return pd.DataFrame(results)


In [34]:
# Create an empty dataframe to store results
results_df = pd.DataFrame()

In [6]:
test_size = 0.2                    # Value between 0 and 1
version_1_or_2 = True             # True is version 1 with only channels as columns, Flase is version 2 with channels and time steps in columns
pca_reduction = True               # Are channels reduced by number of pca components?
data_processing = 'derivative'            # string describing the data procesing procedure before modelling, 'rms', 'derivative', 'raw'
trials_are_meaned = False          # Are the trials meaned?
num_of_principal_comp = 20         # If pca_reduction, choose how many principal components are kept    
frequency_range = (50, np.nan)     # Chooosing the frequencies kept
time_window = [-400, 1600]         # Choosing the time window around the stimulus
time_bin_size = 1                     # Size of the time bin
subject_list = [0, 1]                  # Subject number, has to be in array even if only one subject is wanted
experiment_list = [1, 2, 3]            # Experiment number, has to be in an array if only one subject is wanted


In [86]:
# Import the dataset
train_file_name = "train_data_V2_TimeBin1_DERIVATIVE_Subject0-1_Experiment1-2-3_Time_window--400-1600_Freq-50-nan"
test_file_name = "test_data_V2_TimeBin1_DERIVATIVE_Subject0-1_Experiment1-2-3_Time_window--400-1600_Freq-50-nan"
dataset_name = train_file_name.replace('train_data_','')
model_name = 'logistic regression'
dataset_name

'V2_TimeBin1_DERIVATIVE_Subject0-1_Experiment1-2-3_Time_window--400-1600_Freq-50-nan'

In [87]:
path = "/content/gdrive/MyDrive/Neuromatch Project/Datasets/"
train_df = pd.read_csv(path + train_file_name + ".csv")
test_df = pd.read_csv(path + test_file_name + ".csv")

X_train = train_df.drop(["target"], axis = 1)
y_train = train_df["target"]

X_test = test_df.drop(["target"], axis = 1)
y_test = test_df["target"]

print(X_train.shape)
print(X_test.shape)

(480, 127936)
(120, 127936)


In [37]:
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=42)
X_train, y_train = sm.fit_resample(X_train,y_train)

In [31]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0, max_iter=1000)
log_reg_trained_raw = clf.fit(X_train, y_train)

In [88]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0, max_iter=1000)
log_reg_trained = clf.fit(X_train, y_train)

In [89]:
results = evaluate_model(log_reg_trained, X_train, y_train, X_test, y_test, model_name, dataset_name)
results_df = results_df.append(results)
results_df

--------------------
Evaluating model : LogisticRegression(max_iter=1000, random_state=0)
[0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0]
Training accuracy:  1.0
Test accuracy:  0.825

Train Confusion Matrix is :  [[408   0]
 [  0  72]]
Test Confusion Matrix is :  [[98  4]
 [17  1]]

Precision for training is :  1.0
Precision for test is :  0.2

Recall for training is :  1.0
Recall for test is :  0.05555555555555555

F1_Score for training is :  1.0
F1_Score for test is :  0.08695652173913045
--------------------


Unnamed: 0,Dataset_name,Model_name,Train_accuracy,Test_accuracy,Train_precision,Test_precision,Train_recall,Test_recall,Train_F1_score,Test_F1_score
0,V2_TimeBin1_RAW_Subject0-1_Experiment1-2-3_Tim...,logistic regression,1.0,0.916667,1.0,0.785714,1.0,0.611111,1.0,0.6875
0,V2_TimeBin1_RAW_Subject0-1_Experiment1-2-3_Tim...,logistic regression SMOTE,1.0,0.916667,1.0,0.785714,1.0,0.611111,1.0,0.6875
0,V2_TimeBin1_RAW_Subject0-1_Experiment1-2-3_Tim...,logistic regression,1.0,0.841667,1.0,0.466667,1.0,0.388889,1.0,0.424242
0,V2_PCA20_TimeBin1_RAW_Subject0-1_Experiment1-2...,logistic regression,1.0,0.775,1.0,0.235294,1.0,0.222222,1.0,0.228571
0,V2_PCA20_TimeBin10_Subject0-1_Experiment1-2-3_...,logistic regression,1.0,0.883333,1.0,0.611111,1.0,0.611111,1.0,0.611111
0,V1_TimeBin1_rms_Subject0-1_Experiment1-2-3_Tim...,logistic regression,0.904167,0.841667,0.90625,0.444444,0.402778,0.222222,0.557692,0.296296
0,V1_TimeBin1_derivative_Subject0-1_Experiment1-...,logistic regression,0.850023,0.850146,1.0,1.0,0.000153,0.000973,0.000306,0.001944
0,V1_TimeBin1_RAW_Subject0-1_Experiment1-2-3_Tim...,logistic regression,0.850471,0.849287,0.527703,0.425974,0.029896,0.013667,0.056586,0.026484
0,V2_TimeBin1_RAW_Subject0_Experiment1_Time_wind...,logistic regression,1.0,0.8,1.0,1.0,1.0,0.2,1.0,0.333333
0,V2_TimeBin1_RAW_Subject0_Experiment2_Time_wind...,logistic regression,1.0,0.7,1.0,0.0,1.0,0.0,1.0,


In [92]:
path = "./gdrive/MyDrive/Neuromatch Project/Colab Project/Results/"
results_df.to_csv(path + "model_summary_table.csv", index=False)