In [40]:
# Data Processing
import pandas as pd
import numpy as np

# Modelling
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from scipy.stats import randint

In [41]:
## Getting data ##
learned_data_path = '../processed_EEG_data\clean_learned_JoshIrby.csv'
not_learned_data_path = '../processed_EEG_data\clean_notLearned_JoshIrby.csv'

learned_data = pd.read_csv(learned_data_path)
not_learned_data = pd.read_csv(not_learned_data_path)

In [50]:
# checking how much data for each category
print(f"original learned shape: {learned_data.shape}")
print(f"original not_learned shape: {not_learned_data.shape}")

# make them equal to remove bias
learned_data = learned_data.sample(frac=1, random_state=42).reset_index(drop=True)
not_learned_data = not_learned_data.sample(frac=1, random_state=42).reset_index(drop=True)

# get length 
learned_length = learned_data.shape[0]
not_learned_length = not_learned_data.shape[0]

new_learned_data = learned_data.drop(index=range(not_learned_length, learned_length))
new_not_learned_data = not_learned_data.drop(index=range(learned_length, not_learned_length))

# print results
print(f"new learned shape: {new_learned_data.shape}")
print(f"new not_learned shape: {not_learned_data.shape}")

original learned shape: (140, 2456)
original not_learned shape: (20, 2456)
new learned shape: (20, 2456)
new not_learned shape: (20, 2456)


In [None]:
# convert pd to a np array for the feature matrix X
X_learned = new_learned_data.values  
X_not_learned = not_learned_data.values

# make the label vector y
'''
to indicate which epochs go to which condition
y vector label should be as long as x matrix is long

zeros = learned
ones = not learned
'''
y_learned = np.zeros(X_learned.shape[0])
y_not_learned = np.ones(X_not_learned.shape[0])

# combine the data and labels
# learned + not learned 
# learned labels + not learned labels
X = np.concatenate([X_learned, X_not_learned], axis=0)
y = np.concatenate([y_learned, y_not_learned], axis=0)

new learned shape: (20, 2456)
new not_learned shape: (20, 2456)


In [48]:
## MODEL ##

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Training
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# test it
y_pred = rf.predict(X_test)

# get the accuracy
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix: \n", cm)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Confusion Matrix: 
 [[1 1]
 [6 0]]
Accuracy: 0.125
