In [1]:
### Implementation of XGBoost gradient boost classifier with hyper-parameter tuning.
### Data : All 7 folders.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn import svm, tree
import xgboost
import os
import csv
from sklearn.model_selection import train_test_split
#------------------
from scipy import fft, arange, signal

In [3]:
folder_root = "../../data/"

In [4]:
def find_files(PATH):
    """
    Finds all the files in a particular directory. Return only .csv files.
    """
    files = []
    for r, d, f in os.walk(PATH):
        for file in f:
            if '.csv' in file:
                files.append(os.path.join(r, file).replace("\\","/"))
    return files

In [5]:
def list_dir_files(root):
    """
    Finds all the files in a nested directory of folders and files (.csv)
    """
    files = []
    friendly_name = []
    for x in os.listdir(root):
        subfolder = root + x
        if os.path.isdir(subfolder):
            onfo = find_files(subfolder)
            if(len(onfo) > 0 and len(x.split("_")) > 1):##Removes test folder and empty folders
                files.append(onfo)
                fn = x.split("/")[-1]
                friendly_name.append(fn)
    return files, friendly_name

In [6]:
files, friendly_name = list_dir_files(folder_root)

In [7]:
def remove_meta_data(PATH):
    """
    Return:
    1. Changes in Electric potential based on Unix timestamp from
        the 5 channels of the Emotiv headset. 2 channels from the Frontal Lobe, 
        1 channel from the parietal lobe, and 2 from temporal lobe.
    2. Pandas Dataframe of the data reflected from (1).
    """
    reader = csv.reader(open(PATH, "rt"), delimiter='\t')
    i = 0
    one_file_data = []
    for line in reader:
        if(i > 0):
            one_file_data.append(line)
        i += 1
    one_file_data = np.array(one_file_data)
    columns = one_file_data[0][0].split(",")[3:8]
    row_data = []
    for rows in one_file_data[1:]:
        row_data.append(rows[0].split(",")[3:8])
    dataframe = pd.DataFrame(row_data, columns=columns)
    return np.array(row_data), dataframe

In [8]:
def data_DF_dir(list_PATH):
    """
    Returns all the data from a given set of path files and its associated pandas dataframe object.
    """
    raw_data = []
    dataframes = []
    for file in list_PATH:
        rd, dfob = remove_meta_data(file)
        raw_data.append(rd)
        dataframes.append(dfob)
    return np.array(raw_data), dataframes

In [22]:
def root_subfolder_file_data(root_list):
    """
    Extracts dataframe and np.array() of each file within each subfolder of the root folder.
    Returns:
    1. n(n will increase)x5(m varies) dataframe
    2. n(n will increase)x5(m varies) np.array()
    """
    root_df = []
    root_np = []
    for x in root_list:
        rnd, rdf = data_DF_dir(x)
        root_df.append(rdf)
        root_np.append(rnd)
    return np.array(root_np), root_df

In [23]:
_, raw_df = root_subfolder_file_data(files)

In [24]:
raw_df[0][0].head() ## Anger 0th file

Unnamed: 0,EEG.AF3,EEG.T7,EEG.Pz,EEG.T8,EEG.AF4
0,4216.410156,4259.487305,4187.179688,4088.205078,4203.589844
1,4217.94873,4253.846191,4182.05127,4098.974121,4209.230957
2,4211.794922,4252.307617,4165.128418,4102.05127,4214.871582
3,4216.410156,4252.820313,4158.974121,4105.128418,4223.077148
4,4231.794922,4253.333496,4165.641113,4108.717773,4227.692383


In [25]:
def merge_all_data(dataframe, friendly_name, merge_name):
    """
    Merges all dataframes into one dataframe with associated emotion tag.
    """
    list_df_append_emotion = []
    for x,fn in zip(dataframe, friendly_name):
        for fil in x:
            holder = np.full((1, fil.shape[0]), fn.split("_")[0]).T
            temp_df = fil
            temp_df[merge_name] = holder
            list_df_append_emotion.append(temp_df)
    pd_df = list_df_append_emotion[0]
    for i in range(1, len(list_df_append_emotion)):
        pd_df = pd_df.append(list_df_append_emotion[i])
    pd_df = pd_df.reset_index().drop(['index'],axis=1)
    return pd_df

In [26]:
df = merge_all_data(raw_df, friendly_name, "emotion")

In [28]:
df.describe()

Unnamed: 0,EEG.AF3,EEG.T7,EEG.Pz,EEG.T8,EEG.AF4,emotion
count,119540.0,119540.0,119540.0,119540.0,119540.0,119540
unique,5241.0,4673.0,4465.0,4770.0,4889.0,7
top,4223.589844,4355.384766,4146.153809,4140.512695,4192.820313,8
freq,467.0,385.0,414.0,640.0,508.0,23807


In [None]:
### Looks good. Data all merged.