<a href="https://colab.research.google.com/github/Loleiying/HIPPOCAMPAL-OSCILLATORY-PATTERNS-DURING-ASSOCIATE-RECOGNITION-MEMORY/blob/master/u_med_hack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# import libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report

In [0]:
# read data
egg_en = pd.read_csv("encoding_filtered2to100Hz_cep.csv")
egg_re = pd.read_csv("retrieval_filtered2to100Hz_cep.csv")

In [0]:
# take only the first 300 attributes for model training
X_encoding = egg_en.iloc[:,1:301]
y_encoding_target = egg_en.iloc[:,-1]
X_retri = egg_re.iloc[:,1:301]
y_retri_target = egg_re.iloc[:,-1]

In [0]:
# get classification report
def get_report(y_true, y_pred):
  target_names = ['Intact-Rearranged', 'Intact-Intact']
  return classification_report(y_true, y_pred, target_names=target_names)

# train and test the model
def test(model, df, X, Y):
  score = []
  # KFold
  kf = KFold(n_splits=5)
  for train_index, test_index in kf.split(df.index):
    X_train, X_test, y_train, y_test = X.values[train_index], X.values[test_index], Y.values[train_index], Y.values[test_index]
    model.fit(X_train, y_train) # train the model 
    score.append(model.score(X_test, y_test)) # score of testing
    ypred = model.predict(X_test) # prediction
    ag = ~(ypred==y_test)
    wrong_ag.extend(test_index[ag])
    print(get_report(y_test, ypred)) # get classification report
  print("Mean score", np.array(score).mean()) # mean score of the model
  return wrong_ag

In [0]:
# Downsampling
from sklearn.utils import resample
intact_intact_encoding = egg_en[egg_en['cat']==1] # majority
intact_rearranged_encoding = egg_en[egg_en['cat']==0] # minority 
df_majority = intact_intact_encoding
df_minority = intact_rearranged_encoding

df_majority_downsample = resample(df_majority, 
                                 replace=True,     
                                 n_samples=2500,    
                                 random_state=123) 
df_train_en = pd.concat([df_majority_downsample, df_minority])
# Display new class counts
# print (df_train_re.cat.value_counts())

intact_intact_re = egg_re[egg_re['cat']==1]
intact_rearranged_re = egg_re[egg_re['cat']==0]
df_majority = intact_intact_re
df_minority = intact_rearranged_re

df_majority_downsample = resample(df_majority, 
                                 replace=True,     
                                 n_samples=2500,    
                                 random_state=123) 
df_train_re = pd.concat([df_majority_downsample, df_minority])

# Display new class counts
# print(df_train.cat.value_counts())

X_en = df_train_en.iloc[:,1:301]
y_en = df_train_en.iloc[:,-1]
X_re = df_train_re.iloc[:,1:301]
y_re = df_train_re.iloc[:,-1]

# downsampling testing
rf = RandomForestClassifier(n_estimators=50)
wrong_ag_en = test(rf, df_train_en, X_en, y_en)
len(wrong_ag_en)
#np.savetxt("wrong_encoding.csv", wrong_ag_en, delimiter=",", fmt='%d')
wrong_ag_re = test(rf, df_train_re, X_re, y_re)
len(wrong_ag_re)
#np.savetxt("wrong_retrieval.csv", wrong_ag_re, delimiter=",", fmt='%d')

  'recall', 'true', average, warn_for)


                   precision    recall  f1-score   support

Intact-Rearranged       0.00      0.00      0.00         0
    Intact-Intact       1.00      0.21      0.34      1002

         accuracy                           0.21      1002
        macro avg       0.50      0.10      0.17      1002
     weighted avg       1.00      0.21      0.34      1002



  'recall', 'true', average, warn_for)


                   precision    recall  f1-score   support

Intact-Rearranged       0.00      0.00      0.00         0
    Intact-Intact       1.00      0.18      0.31      1002

         accuracy                           0.18      1002
        macro avg       0.50      0.09      0.15      1002
     weighted avg       1.00      0.18      0.31      1002

                   precision    recall  f1-score   support

Intact-Rearranged       0.56      0.62      0.59       505
    Intact-Intact       0.57      0.51      0.54       496

         accuracy                           0.56      1001
        macro avg       0.56      0.56      0.56      1001
     weighted avg       0.56      0.56      0.56      1001



  'recall', 'true', average, warn_for)


                   precision    recall  f1-score   support

Intact-Rearranged       1.00      0.13      0.24      1001
    Intact-Intact       0.00      0.00      0.00         0

         accuracy                           0.13      1001
        macro avg       0.50      0.07      0.12      1001
     weighted avg       1.00      0.13      0.24      1001



  'recall', 'true', average, warn_for)


                   precision    recall  f1-score   support

Intact-Rearranged       1.00      0.15      0.26      1001
    Intact-Intact       0.00      0.00      0.00         0

         accuracy                           0.15      1001
        macro avg       0.50      0.07      0.13      1001
     weighted avg       1.00      0.15      0.26      1001

Mean score 0.24707508060801472


  'recall', 'true', average, warn_for)


                   precision    recall  f1-score   support

Intact-Rearranged       0.00      0.00      0.00         0
    Intact-Intact       1.00      0.28      0.44       943

         accuracy                           0.28       943
        macro avg       0.50      0.14      0.22       943
     weighted avg       1.00      0.28      0.44       943



  'recall', 'true', average, warn_for)


                   precision    recall  f1-score   support

Intact-Rearranged       0.00      0.00      0.00         0
    Intact-Intact       1.00      0.30      0.46       943

         accuracy                           0.30       943
        macro avg       0.50      0.15      0.23       943
     weighted avg       1.00      0.30      0.46       943

                   precision    recall  f1-score   support

Intact-Rearranged       0.42      0.64      0.51       329
    Intact-Intact       0.73      0.52      0.61       614

         accuracy                           0.57       943
        macro avg       0.58      0.58      0.56       943
     weighted avg       0.62      0.57      0.57       943



  'recall', 'true', average, warn_for)


                   precision    recall  f1-score   support

Intact-Rearranged       1.00      0.07      0.13       943
    Intact-Intact       0.00      0.00      0.00         0

         accuracy                           0.07       943
        macro avg       0.50      0.03      0.06       943
     weighted avg       1.00      0.07      0.13       943

                   precision    recall  f1-score   support

Intact-Rearranged       1.00      0.06      0.12       942
    Intact-Intact       0.00      0.00      0.00         0

         accuracy                           0.06       942
        macro avg       0.50      0.03      0.06       942
     weighted avg       1.00      0.06      0.12       942

Mean score 0.2551564438380468


  'recall', 'true', average, warn_for)


13603

In [0]:
#SMOTE
from imblearn.over_sampling import SMOTE
import numpy as np
sm = SMOTE(random_state=12)
x_train_en, y_train_en = sm.fit_sample(X_encoding, y_encoding_target)
x_train_en = pd.DataFrame(x_train_en)
y_train_en = pd.DataFrame(y_train_en)
df_train_en = (pd.concat([x_train_en, y_train_en])).reset_index()
#print (y_encoding_target.value_counts() , np.bincount(y_train_en))

x_train_re, y_train_re = sm.fit_sample(X_retri, y_retri_target)
x_train_re = pd.DataFrame(x_train_re)
y_train_re = pd.DataFrame(y_train_re)
df_train_re = (pd.concat([x_train_re, y_train_re])).reset_index()

# downsampling testing
rf = RandomForestClassifier(n_estimators=50)
wrong_ag_en1 = test(rf, df_train_en, x_train_en, y_train_en)
len(wrong_ag_en1)
#np.savetxt("wrong_encoding.csv", wrong_ag_en, delimiter=",", fmt='%d')
wrong_ag_re1 = test(rf, df_train_re, x_train_re, y_train_re)
#len(wrong_ag_re1)
#np.savetxt("wrong_retrieval.csv", wrong_ag_re, delimiter=",", fmt='%d')

IndexError: ignored