In [1]:
import csv
import pandas as pd
from scipy import stats
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, f1_score,classification_report, roc_auc_score
import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.neural_network import MLPClassifier, MLPRegressor

In [2]:
xl_file = pd.read_csv('rawdata.csv')
xl_file = xl_file.replace(np.nan, 0)
df = pd.DataFrame(data = xl_file)


In [3]:
## Establishing dataset for a 2 class Naive Bayesian Classifier
# Cons vs Diss comparison
reldata=df.filter(items =['Index','Stimulus','F7', 'F8'])
reldata.loc[reldata["Stimulus"] == 'C Dis1.mp3', 'Stimulus'] = 0
reldata.loc[reldata["Stimulus"] == 'C Dis2.mp3', 'Stimulus'] = 0
reldata.loc[reldata["Stimulus"] == 'D Dis1.mp3', 'Stimulus'] = 0
reldata.loc[reldata["Stimulus"] == 'D Dis2.mp3', 'Stimulus'] = 0
reldata.loc[reldata["Stimulus"] == 'C Cons1.mp3', 'Stimulus'] = 1
reldata.loc[reldata["Stimulus"] == 'C Cons2.mp3', 'Stimulus'] = 1
reldata.loc[reldata["Stimulus"] == 'D Cons1.mp3', 'Stimulus'] = 1
reldata.loc[reldata["Stimulus"] == 'D Cons2.mp3', 'Stimulus'] = 1
reldata.loc[reldata["Stimulus"] == 'C Dis1.mp3 (interval)', 'Stimulus'] = 0
reldata.loc[reldata["Stimulus"] == 'C Dis2.mp3 (interval)', 'Stimulus'] = 0
reldata.loc[reldata["Stimulus"] == 'D Dis1.mp3 (interval)', 'Stimulus'] = 0
reldata.loc[reldata["Stimulus"] == 'D Dis2.mp3 (interval)', 'Stimulus'] = 0
reldata.loc[reldata["Stimulus"] == 'C Cons1.mp3 (interval)', 'Stimulus'] = 1
reldata.loc[reldata["Stimulus"] == 'C Cons2.mp3 (interval)', 'Stimulus'] = 1
reldata.loc[reldata["Stimulus"] == 'D Cons1.mp3 (interval)', 'Stimulus'] = 1
reldata.loc[reldata["Stimulus"] == 'D Cons2.mp3 (interval)', 'Stimulus'] = 1



In [4]:
def remove_outlier(df_in, col_name):
    q1 = df_in[col_name].quantile(0.25)
    q3 = df_in[col_name].quantile(0.75)
    iqr = q3-q1 #Interquartile range
    fence_low  = q1-1.5*iqr
    fence_high = q3+1.5*iqr
    df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)]
    return df_out

In [9]:
reldata1 = remove_outlier(reldata, 'F7')
reldata = remove_outlier(reldata1, 'F8')


In [10]:
reldata.describe()

Unnamed: 0,Index,Stimulus,F7,F8
count,433734.0,433734.0,433734.0,433734.0
mean,12731.789779,0.506345,4071.267227,4418.226201
std,7375.304221,0.49996,247.741379,437.487079
min,2.0,0.0,3314.358974,2719.487179
25%,6344.0,0.0,3881.025641,3951.794872
50%,12707.0,1.0,4090.769231,4692.820513
75%,19088.0,1.0,4257.435897,4784.615385
max,27393.0,1.0,4823.076923,6033.846154


In [11]:
train, test = train_test_split(reldata, test_size = 0.2)
x = train[['F7', 'F8']]
y = train['Stimulus']
xtest = test[['F7', 'F8']]
ytest = test['Stimulus']

ymat = ytest.values.reshape(-1,1)
xmat = x.values

gnb = GaussianNB()
pred = gnb.fit(x, y).predict(xtest)
print(accuracy_score(ymat, pred))
print(precision_score(ymat,pred))
print(recall_score(ymat, pred))
print(f1_score(ymat, pred))
print(confusion_matrix(ymat, pred))
print(roc_auc_score(ymat, pred))

0.5218624275191073
0.5212128394392903
0.6999749903371757
0.5975099707911616
[[14483 28281]
 [13196 30787]]
0.5193238528526212


In [12]:
mlp2 = MLPClassifier(hidden_layer_sizes= (2, 4, 2), activation = 'relu', solver ='adam', max_iter =1000)
mlp2.fit(x,y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(2, 4, 2), learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [13]:
predict_train2 = mlp2.predict(x)
predict_test = mlp2.predict(xtest)
print(classification_report(ymat, predict_test))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00     42764
           1       0.51      1.00      0.67     43983

   micro avg       0.51      0.51      0.51     86747
   macro avg       0.25      0.50      0.34     86747
weighted avg       0.26      0.51      0.34     86747



  'precision', 'predicted', average, warn_for)


In [14]:
## Establishing a data set for a 4 class Naive Bayesian Classifier 
## C cons vs D cons vs C diss vs D diss
reldata4=df.filter(items =['Index','Stimulus','F7', 'F8'])



In [15]:
reldata41 = remove_outlier(reldata4, 'F7')
reldata4 = remove_outlier(reldata41, 'F7')

In [16]:

reldata4.loc[reldata4["Stimulus"] == 'C Dis1.mp3', 'Stimulus'] = 0
reldata4.loc[reldata4["Stimulus"] == 'C Dis2.mp3', 'Stimulus'] = 0
reldata4.loc[reldata4["Stimulus"] == 'D Dis1.mp3', 'Stimulus'] = 1
reldata4.loc[reldata4["Stimulus"] == 'D Dis2.mp3', 'Stimulus'] = 1
reldata4.loc[reldata4["Stimulus"] == 'C Cons1.mp3', 'Stimulus'] = 2
reldata4.loc[reldata4["Stimulus"] == 'C Cons2.mp3', 'Stimulus'] = 2
reldata4.loc[reldata4["Stimulus"] == 'D Cons1.mp3', 'Stimulus'] = 3
reldata4.loc[reldata4["Stimulus"] == 'D Cons2.mp3', 'Stimulus'] = 3
reldata4.loc[reldata4["Stimulus"] == 'C Dis1.mp3 (interval)', 'Stimulus'] = 0
reldata4.loc[reldata4["Stimulus"] == 'C Dis2.mp3 (interval)', 'Stimulus'] = 0
reldata4.loc[reldata4["Stimulus"] == 'D Dis1.mp3 (interval)', 'Stimulus'] = 1
reldata4.loc[reldata4["Stimulus"] == 'D Dis2.mp3 (interval)', 'Stimulus'] = 1
reldata4.loc[reldata4["Stimulus"] == 'C Cons1.mp3 (interval)', 'Stimulus'] = 2
reldata4.loc[reldata4["Stimulus"] == 'C Cons2.mp3 (interval)', 'Stimulus'] = 2
reldata4.loc[reldata4["Stimulus"] == 'D Cons1.mp3 (interval)', 'Stimulus'] = 3
reldata4.loc[reldata4["Stimulus"] == 'D Cons2.mp3 (interval)', 'Stimulus'] = 3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [17]:
train4, test4 = train_test_split(reldata4, test_size = 0.2)
x4 = train4[['F7', 'F8']]
y4 = train4['Stimulus']
xtest4 = test4[['F7', 'F8']]
ytest4 = test4['Stimulus']
ymat4 = ytest4.values.reshape(-1,1)
xmat = x4.values

clf = MultinomialNB(fit_prior = False)
pred4 = gnb.fit(x4, y4).predict(xtest4)
print(accuracy_score(ymat4, pred4))
print(precision_score(ymat4,pred4, average = 'macro'))
print(recall_score(ymat4, pred4, average = 'macro'))
print(f1_score(ymat4, pred4, average = 'macro'))
print(confusion_matrix(ymat4, pred4))


0.2680095359845213
0.29211326936359855
0.265840476008559
0.22680772670070487
[[ 2760   700 11339  7152]
 [ 2149  1058 10851  6993]
 [ 1777   719 12681  6765]
 [ 1837   703 12573  6772]]


In [18]:
mlp4 = MLPClassifier(hidden_layer_sizes= (2, 4, 4), activation = 'relu', solver ='adam', max_iter =1000)
mlp4.fit(x4,y4)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(2, 4, 4), learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [19]:
predict_train4 = mlp4.predict(x4)
predict_test4 = mlp4.predict(xtest4)
print(classification_report(ymat4, predict_test4))

              precision    recall  f1-score   support

           0       0.25      1.00      0.40     21951
           1       0.00      0.00      0.00     21051
           2       0.00      0.00      0.00     21942
           3       0.00      0.00      0.00     21885

   micro avg       0.25      0.25      0.25     86829
   macro avg       0.06      0.25      0.10     86829
weighted avg       0.06      0.25      0.10     86829



  'precision', 'predicted', average, warn_for)


In [20]:
## Establishing a data set for a 8 class Naive Bayesian Classifier
## C cons vs D cons vs C diss vs D diss
reldata8=df.filter(items =['Index','Stimulus','F7', 'F8'])

reldata8.loc[reldata8["Stimulus"] == 'C Dis1.mp3', 'Stimulus'] = 0
reldata8.loc[reldata8["Stimulus"] == 'C Dis2.mp3', 'Stimulus'] = 1
reldata8.loc[reldata8["Stimulus"] == 'D Dis1.mp3', 'Stimulus'] = 2
reldata8.loc[reldata8["Stimulus"] == 'D Dis2.mp3', 'Stimulus'] = 3
reldata8.loc[reldata8["Stimulus"] == 'C Cons1.mp3', 'Stimulus'] = 4
reldata8.loc[reldata8["Stimulus"] == 'C Cons2.mp3', 'Stimulus'] = 5
reldata8.loc[reldata8["Stimulus"] == 'D Cons1.mp3', 'Stimulus'] = 6
reldata8.loc[reldata8["Stimulus"] == 'D Cons2.mp3', 'Stimulus'] = 7
reldata8.loc[reldata8["Stimulus"] == 'C Dis1.mp3 (interval)', 'Stimulus'] = 0
reldata8.loc[reldata8["Stimulus"] == 'C Dis2.mp3 (interval)', 'Stimulus'] = 1
reldata8.loc[reldata8["Stimulus"] == 'D Dis1.mp3 (interval)', 'Stimulus'] = 2
reldata8.loc[reldata8["Stimulus"] == 'D Dis2.mp3 (interval)', 'Stimulus'] = 3
reldata8.loc[reldata8["Stimulus"] == 'C Cons1.mp3 (interval)', 'Stimulus'] = 4
reldata8.loc[reldata8["Stimulus"] == 'C Cons2.mp3 (interval)', 'Stimulus'] = 5
reldata8.loc[reldata8["Stimulus"] == 'D Cons1.mp3 (interval)', 'Stimulus'] = 6
reldata8.loc[reldata8["Stimulus"] == 'D Cons2.mp3 (interval)', 'Stimulus'] = 7

In [23]:
reldata8 = remove_outlier(reldata8, 'F7')
reldata8 = remove_outlier(reldata8, 'F8')

In [24]:
train8, test8 = train_test_split(reldata8, test_size = 0.2)
x8 = train8[['F7', 'F8']]
y8 = train8['Stimulus']
xtest8 = test8[['F7', 'F8']]
ytest8 = test8['Stimulus']
ymat8 = ytest8.values.reshape(-1,1)
xmat8 = x8.values

clf = MultinomialNB(fit_prior = False)
pred8 = gnb.fit(x8, y8).predict(xtest8)
print(accuracy_score(ymat8, pred8))
print(precision_score(ymat8,pred8, average = 'macro'))
print(recall_score(ymat8, pred8, average = 'macro'))
print(f1_score(ymat8, pred8, average = 'macro'))
print(confusion_matrix(ymat8, pred8))

0.13878289739126426
0.12040327579581564
0.13650855819263463
0.09592180393482838
[[ 666  749   14    0 4779 4146  262  586]
 [ 583 1213   26    0 3972 4014  299  494]
 [ 602  819    8    0 4132 4570  415  484]
 [ 573  849    9    0 3893 3970  245  374]
 [ 652  424   14    0 4639 4292  363  579]
 [ 691  891   18    0 4485 4530  219  369]
 [ 790  607   13    0 4316 4246  357  553]
 [ 669  669   13    0 4393 4371  212  626]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [147]:
mlp8 = MLPClassifier(hidden_layer_sizes= (2, 8, 8), activation = 'relu', solver ='adam', max_iter =1000)
mlp8.fit(x8,y8)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(2, 8, 8), learning_rate='constant',
       learning_rate_init=0.001, max_iter=1000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [148]:
predict_train8 = mlp8.predict(x8)
predict_test8 = mlp8.predict(xtest8)
print(classification_report(ymat8, predict_test8))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00     12507
           1       0.11      0.00      0.00     12599
           2       0.00      0.00      0.00     12315
           3       0.00      0.00      0.00     11185
           4       0.00      0.00      0.00     12546
           5       0.13      0.98      0.22     12394
           6       0.18      0.03      0.05     12589
           7       0.22      0.00      0.00     12614

   micro avg       0.13      0.13      0.13     98749
   macro avg       0.08      0.13      0.03     98749
weighted avg       0.08      0.13      0.03     98749



  'precision', 'predicted', average, warn_for)
