# Emotion classfication on DEAP EEG data
Data source: https://www.eecs.qmul.ac.uk/mmv/datasets/deap/

Preprocessed source: https://github.com/pratyakshajha/emotion-recognition-by-deap-dataset/tree/master/Scripts 

## Imports

In [4]:
import sys
sys.path.append("../itershap")

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

from itershap import IterSHAP

## Get preprocessed DEAP data

In [8]:
# Data preprocessed using: SOURCE
# Preprocessed tabular data stored in data/deap folder

def load_deap_data(PERC_DP_USED):
    filepath = "../data/deap/SOURCE.csv"
    f = open(filepath)
    attributes=f.readline()
    X = []
    y = []
    for line in f:
        line = line.rstrip().split(',')
        l = [float(i) for i in line]
        X.append(l[:-1])
        y.append(l[-1])

    X = np.asarray(X)
    y = np.asarray([round(k-1) for k in y])
    X = pd.DataFrame(X)

    # Randomly select a portion of the datapoints, based on PERC_DP_USED parameter
    if PERC_DP_USED < 1.0:
      # If PERC_DP_USED == 1.0, then all DP will be used for the model
      X, _, y, _ = train_test_split(X, y, train_size=PERC_DP_USED, random_state=20)

    return X, y

# load_deap_data(1.00)

## Run IterSHAP

In [None]:
PERC_DATA_USED = 0.01

# Load data from the data folder
X, y = load_deap_data(PERC_DATA_USED)

# Create a data copy to test model performance without feature selection 
X_without_fs = pd.DataFrame(X)

# Check the current shape of the dataset
print(X.shape)

# Create and fit IterSHAP using a RandomForestClassifier (default)
itershap_fs = IterSHAP()
itershap_fs.fit(X, y)

# Transform the input data to only include selected features and print its shape
X = itershap_fs.transform()
print(X.shape)

## Run model with and without feature selection

#### Without feature selection

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_without_fs, y, test_size=0.25)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_pred, y_test)
print(accuracy)

#### With feature selection

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_pred, y_test)
print(accuracy)