In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn

%matplotlib inline

seed = 1356

raw_data = pd.read_pickle("domestic_audio_features.pkl")

In [2]:
afs, labels, _ = zip(*[line.rstrip('\n').split('\t') for line in open('DCASE2018-task5-dev.meta/DCASE2018-task5-dev/meta.txt').readlines()])

In [3]:
sces = set(labels)
sce_int_map = {sce:i+1 for i,sce in enumerate(list(sces))}
int_sce_map = {sce_int_map[i]:i for i in sce_int_map.keys()}

In [4]:
raw_data.head()

Unnamed: 0,label,mfcc_01_mean,mfcc_01_std,mfcc_02_mean,mfcc_02_std,mfcc_03_mean,mfcc_03_std,mfcc_04_mean,mfcc_04_std,mfcc_05_mean,...,rmse_mean,rmse_std,rolloff_mean,rolloff_std,spec_centoid_mean,spec_centoid_std,spec_flux_mean,spec_flux_std,zcr_mean,zcr_std
0,9,-336.051919,72.676704,52.751568,50.311581,20.932058,15.137371,22.713074,10.642034,13.676562,...,0.051768,0.039705,3600.289537,1720.669191,1469.152555,883.046342,1.055207,1.524605,0.006329,0.020022
1,9,-336.854385,77.127879,25.032802,31.575797,11.374043,16.556974,13.848688,8.344629,11.24008,...,0.046316,0.015529,4702.051717,1401.513136,1892.227442,923.234354,1.027018,1.234435,0.005552,0.025613
2,9,-357.847449,64.125447,30.350622,20.617674,12.364442,8.837606,12.060213,11.345274,9.299338,...,0.043852,0.000479,4420.377396,1346.985001,1676.309097,773.329444,0.879802,0.785668,0.000212,0.001167
3,9,-314.862922,66.357897,8.575267,25.946491,10.373437,10.781843,13.793221,7.895745,9.271161,...,0.044296,0.001244,5490.415335,1152.930682,2336.224332,858.833091,1.201696,1.554686,0.00287,0.010235
4,9,-335.962007,70.711474,54.434298,38.43189,15.171099,13.887398,14.233967,11.697274,10.299907,...,0.047941,0.016913,3716.328874,1350.669698,1438.245156,664.098987,1.07201,1.322863,0.004752,0.012924


In [9]:
# split the data such that each label is equally represented
balanced_data = raw_data.groupby('label')
balanced_data = balanced_data.apply(lambda x: x.sample(balanced_data.size().min(), random_state=seed).reset_index(drop=True))
y = balanced_data['label']
X = balanced_data.drop('label', axis=1)

In [10]:
y_unbal = raw_data['label']
X_unbal = raw_data.drop('label', axis=1)

In [5]:
# split data into stratified folds for cross validation
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=4, random_state=seed)

In [6]:
# create helper function for classifier
from sklearn import preprocessing
from sklearn.metrics import f1_score

# for each fold,
# preprocess data into std, minmax and raw
# train clf for each preprocessing method
# get the score for each preprocessing method
# display avg f-score for each preprocessing method

def cross_val(X, y, clf, skf):
    nppac = stdac = mmac = 0
    for i, (train, test) in enumerate(skf.split(X, y)):
        X_train, X_test = X.values[train], X.values[test]
        y_train, y_test = y.values[train], y.values[test]
        
        # standard
        std_scaler = preprocessing.StandardScaler().fit(X_train)
        X_train_std = std_scaler.transform(X_train)
        X_test_std = std_scaler.transform(X_test)

        # min max scaling
        minmax_scaler = preprocessing.MinMaxScaler().fit(X_train)
        X_train_minmax = minmax_scaler.transform(X_train)
        X_test_minmax = minmax_scaler.transform(X_test)
        
        # no preprocessing
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        score = f1_score(y_test, y_pred, average='macro')
        print("No pre-processing, fold {0}: {1:.5f}".format(i+1, score))
        nppac += score
        
        # std
        clf.fit(X_train_std, y_train)
        y_pred = clf.predict(X_test_std)
        score = f1_score(y_test, y_pred, average='macro')
        print("Standard pre-processing, fold {0}: {1:.5f}".format(i+1, score))
        stdac += score
        
        # minmax
        clf.fit(X_train_minmax, y_train)
        y_pred = clf.predict(X_test_minmax)
        score = f1_score(y_test, y_pred, average='macro')
        print("Min-max pre-processing, fold {0}: {1:.5f}".format(i+1, score))
        mmac += score
        print()
    print("======================================================")
    print("No pre-processing average: {:.5f}".format(nppac / 4))
    print("Standard pre-processing average: {:.5f}".format(stdac / 4))
    print("Min-max pre-processing average: {:.5f}".format(mmac / 4))
    print("======================================================")

In [11]:
# knn
from sklearn.neighbors import KNeighborsClassifier
neigh=KNeighborsClassifier(n_neighbors = 1)
cross_val(X, y, neigh, skf)

No pre-processing, fold 1: 0.58228
Standard pre-processing, fold 1: 0.84789
Min-max pre-processing, fold 1: 0.84655

No pre-processing, fold 2: 0.60467
Standard pre-processing, fold 2: 0.85608
Min-max pre-processing, fold 2: 0.86045

No pre-processing, fold 3: 0.57359
Standard pre-processing, fold 3: 0.84712
Min-max pre-processing, fold 3: 0.84899

No pre-processing, fold 4: 0.60140
Standard pre-processing, fold 4: 0.84501
Min-max pre-processing, fold 4: 0.84664

No pre-processing average: 0.59049
Standard pre-processing average: 0.84902
Min-max pre-processing average: 0.85066


In [24]:
# knn
from sklearn.neighbors import KNeighborsClassifier
neigh=KNeighborsClassifier(n_neighbors = 3)
cross_val(X, y, neigh, skf)

No pre-processing, fold 1: 0.58448
Standard pre-processing, fold 1: 0.84705
Min-max pre-processing, fold 1: 0.84371

No pre-processing, fold 2: 0.61776
Standard pre-processing, fold 2: 0.86150
Min-max pre-processing, fold 2: 0.86267

No pre-processing, fold 3: 0.59800
Standard pre-processing, fold 3: 0.84717
Min-max pre-processing, fold 3: 0.84363

No pre-processing, fold 4: 0.59995
Standard pre-processing, fold 4: 0.84813
Min-max pre-processing, fold 4: 0.84842

No pre-processing average: 0.60005
Standard pre-processing average: 0.85096
Min-max pre-processing average: 0.84961


In [25]:
# logistic regression
from sklearn.linear_model import LogisticRegression
C_values = [0.001, 0.01, 0.1, 1, 10, 100] # small C means underfitting
for c in C_values:
    logre=LogisticRegression(C=c, random_state=seed, solver='sag', max_iter=500)
    print("C value: {}".format(c))
    cross_val(X, y, logre, skf)
    print()

C value: 0.001




No pre-processing, fold 1: 0.71411
Standard pre-processing, fold 1: 0.71451
Min-max pre-processing, fold 1: 0.49160

No pre-processing, fold 2: 0.72903
Standard pre-processing, fold 2: 0.74080
Min-max pre-processing, fold 2: 0.50266

No pre-processing, fold 3: 0.72104
Standard pre-processing, fold 3: 0.71539
Min-max pre-processing, fold 3: 0.50325

No pre-processing, fold 4: 0.72924
Standard pre-processing, fold 4: 0.72286
Min-max pre-processing, fold 4: 0.49356

No pre-processing average: 0.72336
Standard pre-processing average: 0.72339
Min-max pre-processing average: 0.49777

C value: 0.01
No pre-processing, fold 1: 0.71516
Standard pre-processing, fold 1: 0.78030
Min-max pre-processing, fold 1: 0.64950

No pre-processing, fold 2: 0.73073
Standard pre-processing, fold 2: 0.80138
Min-max pre-processing, fold 2: 0.69101

No pre-processing, fold 3: 0.72274
Standard pre-processing, fold 3: 0.78774
Min-max pre-processing, fold 3: 0.66102

No pre-processing, fold 4: 0.72878
Standard pre-pr

In [26]:
# svm kernel: rbf
from sklearn.svm import SVC

poly_deg = [1, 2, 3, 4]
gamma_val = [0.001, 0.01, 0.1, 1, 10, 100]
C_values = [0.001, 0.01, 0.1, 1, 10, 100]
kernels = ['rbf']

for kernel in kernels:
    print("Kernel: {}".format(kernel))
    if kernel == 'linear':
        for c in C_values:
            svm=SVC(kernel=kernel, C=c, random_state=seed, max_iter=500)
            print("C value: {}".format(c))
            cross_val(X, y, svm, skf)
            print()
    if kernel == 'poly':
        for c in C_values:
            for deg in poly_deg:
                for gamma in gamma_val:
                    svm=SVC(kernel=kernel, C=c, degree=degree, gamma=gamma, random_state=seed, max_iter=500)
                    print("C: {0}, Degrees: {1}, Gamma: {2}".format(c, degree, gamma))
                    cross_val(X, y, svm, skf)
                    print()
    if kernel == 'rbf':
        for c in C_values:
            for gamma in gamma_val:
                svm=SVC(kernel=kernel, C=c, gamma=gamma, random_state=seed, max_iter=500)
                print("C: {0}, Gamma: {1}".format(c, gamma))
                cross_val(X, y, svm, skf)
                print()

Kernel: rbf
C: 0.001, Gamma: 0.001




No pre-processing, fold 1: 0.48975
Standard pre-processing, fold 1: 0.48491
Min-max pre-processing, fold 1: 0.52561

No pre-processing, fold 2: 0.50226
Standard pre-processing, fold 2: 0.43799
Min-max pre-processing, fold 2: 0.55838

No pre-processing, fold 3: 0.46852
Standard pre-processing, fold 3: 0.49048
Min-max pre-processing, fold 3: 0.52680

No pre-processing, fold 4: 0.49224
Standard pre-processing, fold 4: 0.50504
Min-max pre-processing, fold 4: 0.52948

No pre-processing average: 0.48819
Standard pre-processing average: 0.47961
Min-max pre-processing average: 0.53507

C: 0.001, Gamma: 0.01
No pre-processing, fold 1: 0.54721
Standard pre-processing, fold 1: 0.61288
Min-max pre-processing, fold 1: 0.52561

No pre-processing, fold 2: 0.56737
Standard pre-processing, fold 2: 0.62726
Min-max pre-processing, fold 2: 0.56479

No pre-processing, fold 3: 0.54373
Standard pre-processing, fold 3: 0.60596
Min-max pre-processing, fold 3: 0.51045

No pre-processing, fold 4: 0.56218
Standar

  'precision', 'predicted', average, warn_for)


No pre-processing, fold 1: 0.12335
Standard pre-processing, fold 1: 0.58021
Min-max pre-processing, fold 1: 0.69750

No pre-processing, fold 2: 0.12174
Standard pre-processing, fold 2: 0.60846
Min-max pre-processing, fold 2: 0.73652

No pre-processing, fold 3: 0.12403
Standard pre-processing, fold 3: 0.59768
Min-max pre-processing, fold 3: 0.71001

No pre-processing, fold 4: 0.12072
Standard pre-processing, fold 4: 0.58125
Min-max pre-processing, fold 4: 0.71921

No pre-processing average: 0.12246
Standard pre-processing average: 0.59190
Min-max pre-processing average: 0.71581

C: 0.001, Gamma: 10
No pre-processing, fold 1: 0.02763
Standard pre-processing, fold 1: 0.72425
Min-max pre-processing, fold 1: 0.68270

No pre-processing, fold 2: 0.02935
Standard pre-processing, fold 2: 0.76817
Min-max pre-processing, fold 2: 0.71348

No pre-processing, fold 3: 0.02939
Standard pre-processing, fold 3: 0.72503
Min-max pre-processing, fold 3: 0.70021

No pre-processing, fold 4: 0.02942
Standard 

No pre-processing, fold 4: 0.41256
Standard pre-processing, fold 4: 0.76084
Min-max pre-processing, fold 4: 0.72817

No pre-processing average: 0.41671
Standard pre-processing average: 0.76392
Min-max pre-processing average: 0.72039

C: 0.1, Gamma: 1
No pre-processing, fold 1: 0.12355
Standard pre-processing, fold 1: 0.58188
Min-max pre-processing, fold 1: 0.76451

No pre-processing, fold 2: 0.12174
Standard pre-processing, fold 2: 0.60819
Min-max pre-processing, fold 2: 0.79305

No pre-processing, fold 3: 0.12403
Standard pre-processing, fold 3: 0.59884
Min-max pre-processing, fold 3: 0.79002

No pre-processing, fold 4: 0.12072
Standard pre-processing, fold 4: 0.57986
Min-max pre-processing, fold 4: 0.77996

No pre-processing average: 0.12251
Standard pre-processing average: 0.59219
Min-max pre-processing average: 0.78189

C: 0.1, Gamma: 10
No pre-processing, fold 1: 0.02763
Standard pre-processing, fold 1: 0.66881
Min-max pre-processing, fold 1: 0.68179

No pre-processing, fold 2: 0.

No pre-processing, fold 1: 0.10809
Standard pre-processing, fold 1: 0.88242
Min-max pre-processing, fold 1: 0.84049

No pre-processing, fold 2: 0.10647
Standard pre-processing, fold 2: 0.89463
Min-max pre-processing, fold 2: 0.85632

No pre-processing, fold 3: 0.13346
Standard pre-processing, fold 3: 0.88906
Min-max pre-processing, fold 3: 0.85108

No pre-processing, fold 4: 0.10651
Standard pre-processing, fold 4: 0.88094
Min-max pre-processing, fold 4: 0.84702

No pre-processing average: 0.11363
Standard pre-processing average: 0.88676
Min-max pre-processing average: 0.84872

C: 10, Gamma: 1
No pre-processing, fold 1: 0.02955
Standard pre-processing, fold 1: 0.70600
Min-max pre-processing, fold 1: 0.87803

No pre-processing, fold 2: 0.03232
Standard pre-processing, fold 2: 0.72779
Min-max pre-processing, fold 2: 0.89122

No pre-processing, fold 3: 0.03841
Standard pre-processing, fold 3: 0.71157
Min-max pre-processing, fold 3: 0.88751

No pre-processing, fold 4: 0.03307
Standard pre-

In [27]:
# svm kernel:linear
from sklearn.svm import SVC

poly_deg = [1, 2, 3, 4]
gamma_val = [0.001, 0.01, 0.1, 1, 10, 100]
C_values = [0.001, 0.01, 0.1, 1, 10, 100]
kernels = ['linear']

for kernel in kernels:
    print("Kernel: {}".format(kernel))
    if kernel == 'linear':
        for c in C_values:
            svm=SVC(kernel=kernel, C=c, random_state=seed, max_iter=500)
            print("C value: {}".format(c))
            cross_val(X, y, svm, skf)
            print()
    if kernel == 'poly':
        for c in C_values:
            for deg in poly_deg:
                for gamma in gamma_val:
                    svm=SVC(kernel=kernel, C=c, degree=degree, gamma=gamma, random_state=seed, max_iter=500)
                    print("C: {0}, Degrees: {1}, Gamma: {2}".format(c, degree, gamma))
                    cross_val(X, y, svm, skf)
                    print()
    if kernel == 'rbf':
        for c in C_values:
            for gamma in gamma_val:
                svm=SVC(kernel=kernel, C=c, gamma=gamma, random_state=seed, max_iter=500)
                print("C: {0}, Gamma: {1}".format(c, gamma))
                cross_val(X, y, svm, skf)
                print()

Kernel: linear
C value: 0.001




No pre-processing, fold 1: 0.44539
Standard pre-processing, fold 1: 0.72182
Min-max pre-processing, fold 1: 0.56177

No pre-processing, fold 2: 0.40859
Standard pre-processing, fold 2: 0.75592
Min-max pre-processing, fold 2: 0.59006

No pre-processing, fold 3: 0.54681
Standard pre-processing, fold 3: 0.75292
Min-max pre-processing, fold 3: 0.55604

No pre-processing, fold 4: 0.42417
Standard pre-processing, fold 4: 0.75147
Min-max pre-processing, fold 4: 0.56947

No pre-processing average: 0.45624
Standard pre-processing average: 0.74553
Min-max pre-processing average: 0.56933

C value: 0.01
No pre-processing, fold 1: 0.37159
Standard pre-processing, fold 1: 0.80939
Min-max pre-processing, fold 1: 0.66865

No pre-processing, fold 2: 0.38232
Standard pre-processing, fold 2: 0.82526
Min-max pre-processing, fold 2: 0.67230

No pre-processing, fold 3: 0.35835
Standard pre-processing, fold 3: 0.81217
Min-max pre-processing, fold 3: 0.60585

No pre-processing, fold 4: 0.44793
Standard pre-pr

In [11]:
# svm kernel:poly
from sklearn.svm import SVC

poly_deg = [1, 2, 3, 4]
gamma_val = [0.001, 0.01, 0.1, 1, 10, 100]
C_values = [0.001, 0.01, 0.1, 1, 10, 100]
kernels = ['poly']

for kernel in kernels:
    print("Kernel: {}".format(kernel))
    if kernel == 'linear':
        for c in C_values:
            svm=SVC(kernel=kernel, C=c, random_state=seed, max_iter=500)
            print("C value: {}".format(c))
            cross_val(X, y, svm, skf)
            print()
    if kernel == 'poly':
        for c in C_values:
            for deg in poly_deg:
                for gamma in gamma_val:
                    svm=SVC(kernel=kernel, C=c, degree=deg, gamma=gamma, random_state=seed, max_iter=500)
                    print("C: {0}, Degrees: {1}, Gamma: {2}".format(c, deg, gamma))
                    cross_val(X, y, svm, skf)
                    print()
    if kernel == 'rbf':
        for c in C_values:
            for gamma in gamma_val:
                svm=SVC(kernel=kernel, C=c, gamma=gamma, random_state=seed, max_iter=500)
                print("C: {0}, Gamma: {1}".format(c, gamma))
                cross_val(X, y, svm, skf)
                print()

Kernel: poly
C: 0.001, Degrees: 1, Gamma: 0.001




No pre-processing, fold 1: 0.55282
Standard pre-processing, fold 1: 0.46246
Min-max pre-processing, fold 1: 0.52629

No pre-processing, fold 2: 0.55247
Standard pre-processing, fold 2: 0.42978
Min-max pre-processing, fold 2: 0.55789

No pre-processing, fold 3: 0.54470
Standard pre-processing, fold 3: 0.47177
Min-max pre-processing, fold 3: 0.52418

No pre-processing, fold 4: 0.51759
Standard pre-processing, fold 4: 0.51142
Min-max pre-processing, fold 4: 0.53025

No pre-processing average: 0.54189
Standard pre-processing average: 0.46886
Min-max pre-processing average: 0.53465

C: 0.001, Degrees: 1, Gamma: 0.01
No pre-processing, fold 1: 0.61632
Standard pre-processing, fold 1: 0.46318
Min-max pre-processing, fold 1: 0.52392

No pre-processing, fold 2: 0.57571
Standard pre-processing, fold 2: 0.48806
Min-max pre-processing, fold 2: 0.55789

No pre-processing, fold 3: 0.56843
Standard pre-processing, fold 3: 0.47907
Min-max pre-processing, fold 3: 0.52460

No pre-processing, fold 4: 0.5

  'precision', 'predicted', average, warn_for)


No pre-processing, fold 1: 0.34698
Standard pre-processing, fold 1: 0.33935
Min-max pre-processing, fold 1: 0.53314

No pre-processing, fold 2: 0.34580
Standard pre-processing, fold 2: 0.38630
Min-max pre-processing, fold 2: 0.48858

No pre-processing, fold 3: 0.35905
Standard pre-processing, fold 3: 0.32094
Min-max pre-processing, fold 3: 0.49794

No pre-processing, fold 4: 0.43438
Standard pre-processing, fold 4: 0.34032
Min-max pre-processing, fold 4: 0.49645

No pre-processing average: 0.37155
Standard pre-processing average: 0.34673
Min-max pre-processing average: 0.50403

C: 0.001, Degrees: 2, Gamma: 0.01
No pre-processing, fold 1: 0.44781
Standard pre-processing, fold 1: 0.34155
Min-max pre-processing, fold 1: 0.53314

No pre-processing, fold 2: 0.44469
Standard pre-processing, fold 2: 0.38727
Min-max pre-processing, fold 2: 0.48858

No pre-processing, fold 3: 0.32566
Standard pre-processing, fold 3: 0.32320
Min-max pre-processing, fold 3: 0.49794

No pre-processing, fold 4: 0.4

Min-max pre-processing, fold 2: 0.69790

No pre-processing, fold 3: 0.40900
Standard pre-processing, fold 3: 0.76876
Min-max pre-processing, fold 3: 0.69411

No pre-processing, fold 4: 0.37774
Standard pre-processing, fold 4: 0.77750
Min-max pre-processing, fold 4: 0.73324

No pre-processing average: 0.37569
Standard pre-processing average: 0.77733
Min-max pre-processing average: 0.71176

C: 0.001, Degrees: 4, Gamma: 0.001
No pre-processing, fold 1: 0.38382
Standard pre-processing, fold 1: 0.29325
Min-max pre-processing, fold 1: 0.42050

No pre-processing, fold 2: 0.33323
Standard pre-processing, fold 2: 0.32577
Min-max pre-processing, fold 2: 0.40673

No pre-processing, fold 3: 0.29623
Standard pre-processing, fold 3: 0.27231
Min-max pre-processing, fold 3: 0.41743

No pre-processing, fold 4: 0.26662
Standard pre-processing, fold 4: 0.27772
Min-max pre-processing, fold 4: 0.42235

No pre-processing average: 0.31998
Standard pre-processing average: 0.29226
Min-max pre-processing averag

Standard pre-processing, fold 4: 0.84552
Min-max pre-processing, fold 4: 0.76772

No pre-processing average: 0.38065
Standard pre-processing average: 0.84410
Min-max pre-processing average: 0.76922

C: 0.01, Degrees: 1, Gamma: 100
No pre-processing, fold 1: 0.39658
Standard pre-processing, fold 1: 0.77232
Min-max pre-processing, fold 1: 0.81928

No pre-processing, fold 2: 0.42210
Standard pre-processing, fold 2: 0.81759
Min-max pre-processing, fold 2: 0.84060

No pre-processing, fold 3: 0.38940
Standard pre-processing, fold 3: 0.79761
Min-max pre-processing, fold 3: 0.82595

No pre-processing, fold 4: 0.41105
Standard pre-processing, fold 4: 0.77083
Min-max pre-processing, fold 4: 0.83221

No pre-processing average: 0.40478
Standard pre-processing average: 0.78959
Min-max pre-processing average: 0.82951

C: 0.01, Degrees: 2, Gamma: 0.001
No pre-processing, fold 1: 0.44495
Standard pre-processing, fold 1: 0.34001
Min-max pre-processing, fold 1: 0.53314

No pre-processing, fold 2: 0.3700

No pre-processing, fold 1: 0.41903
Standard pre-processing, fold 1: 0.77100
Min-max pre-processing, fold 1: 0.70198

No pre-processing, fold 2: 0.32132
Standard pre-processing, fold 2: 0.78665
Min-max pre-processing, fold 2: 0.70988

No pre-processing, fold 3: 0.34975
Standard pre-processing, fold 3: 0.77405
Min-max pre-processing, fold 3: 0.72184

No pre-processing, fold 4: 0.32698
Standard pre-processing, fold 4: 0.78532
Min-max pre-processing, fold 4: 0.73966

No pre-processing average: 0.35427
Standard pre-processing average: 0.77925
Min-max pre-processing average: 0.71834

C: 0.01, Degrees: 3, Gamma: 100
No pre-processing, fold 1: 0.33100
Standard pre-processing, fold 1: 0.77352
Min-max pre-processing, fold 1: 0.72181

No pre-processing, fold 2: 0.38502
Standard pre-processing, fold 2: 0.78955
Min-max pre-processing, fold 2: 0.69790

No pre-processing, fold 3: 0.40900
Standard pre-processing, fold 3: 0.76876
Min-max pre-processing, fold 3: 0.69411

No pre-processing, fold 4: 0.377

No pre-processing, fold 3: 0.35310
Standard pre-processing, fold 3: 0.84416
Min-max pre-processing, fold 3: 0.76388

No pre-processing, fold 4: 0.39884
Standard pre-processing, fold 4: 0.84685
Min-max pre-processing, fold 4: 0.76772

No pre-processing average: 0.40131
Standard pre-processing average: 0.84384
Min-max pre-processing average: 0.76922

C: 0.1, Degrees: 1, Gamma: 10
No pre-processing, fold 1: 0.38702
Standard pre-processing, fold 1: 0.77366
Min-max pre-processing, fold 1: 0.81926

No pre-processing, fold 2: 0.29182
Standard pre-processing, fold 2: 0.83040
Min-max pre-processing, fold 2: 0.84063

No pre-processing, fold 3: 0.45202
Standard pre-processing, fold 3: 0.79777
Min-max pre-processing, fold 3: 0.82590

No pre-processing, fold 4: 0.36203
Standard pre-processing, fold 4: 0.77746
Min-max pre-processing, fold 4: 0.83217

No pre-processing average: 0.37322
Standard pre-processing average: 0.79482
Min-max pre-processing average: 0.82949

C: 0.1, Degrees: 1, Gamma: 100
No 

No pre-processing, fold 1: 0.33856
Standard pre-processing, fold 1: 0.76919
Min-max pre-processing, fold 1: 0.83649

No pre-processing, fold 2: 0.35480
Standard pre-processing, fold 2: 0.78906
Min-max pre-processing, fold 2: 0.87379

No pre-processing, fold 3: 0.33784
Standard pre-processing, fold 3: 0.76331
Min-max pre-processing, fold 3: 0.86231

No pre-processing, fold 4: 0.34829
Standard pre-processing, fold 4: 0.78994
Min-max pre-processing, fold 4: 0.86508

No pre-processing average: 0.34487
Standard pre-processing average: 0.77787
Min-max pre-processing average: 0.85942

C: 0.1, Degrees: 3, Gamma: 10
No pre-processing, fold 1: 0.41903
Standard pre-processing, fold 1: 0.77100
Min-max pre-processing, fold 1: 0.70867

No pre-processing, fold 2: 0.32132
Standard pre-processing, fold 2: 0.78665
Min-max pre-processing, fold 2: 0.72141

No pre-processing, fold 3: 0.34975
Standard pre-processing, fold 3: 0.77405
Min-max pre-processing, fold 3: 0.71412

No pre-processing, fold 4: 0.32698

No pre-processing, fold 3: 0.42796
Standard pre-processing, fold 3: 0.84474
Min-max pre-processing, fold 3: 0.76388

No pre-processing, fold 4: 0.46641
Standard pre-processing, fold 4: 0.84728
Min-max pre-processing, fold 4: 0.76772

No pre-processing average: 0.41491
Standard pre-processing average: 0.84456
Min-max pre-processing average: 0.76922

C: 1, Degrees: 1, Gamma: 1
No pre-processing, fold 1: 0.43947
Standard pre-processing, fold 1: 0.77346
Min-max pre-processing, fold 1: 0.81926

No pre-processing, fold 2: 0.31972
Standard pre-processing, fold 2: 0.81795
Min-max pre-processing, fold 2: 0.84060

No pre-processing, fold 3: 0.35310
Standard pre-processing, fold 3: 0.79815
Min-max pre-processing, fold 3: 0.82590

No pre-processing, fold 4: 0.41109
Standard pre-processing, fold 4: 0.77074
Min-max pre-processing, fold 4: 0.83221

No pre-processing average: 0.38085
Standard pre-processing average: 0.79007
Min-max pre-processing average: 0.82950

C: 1, Degrees: 1, Gamma: 10
No pre-pr

No pre-processing, fold 1: 0.34558
Standard pre-processing, fold 1: 0.81603
Min-max pre-processing, fold 1: 0.78648

No pre-processing, fold 2: 0.41489
Standard pre-processing, fold 2: 0.86915
Min-max pre-processing, fold 2: 0.80598

No pre-processing, fold 3: 0.36791
Standard pre-processing, fold 3: 0.84326
Min-max pre-processing, fold 3: 0.79670

No pre-processing, fold 4: 0.43988
Standard pre-processing, fold 4: 0.84610
Min-max pre-processing, fold 4: 0.78999

No pre-processing average: 0.39207
Standard pre-processing average: 0.84364
Min-max pre-processing average: 0.79479

C: 1, Degrees: 3, Gamma: 1
No pre-processing, fold 1: 0.33856
Standard pre-processing, fold 1: 0.76919
Min-max pre-processing, fold 1: 0.76635

No pre-processing, fold 2: 0.35480
Standard pre-processing, fold 2: 0.78906
Min-max pre-processing, fold 2: 0.72942

No pre-processing, fold 3: 0.33784
Standard pre-processing, fold 3: 0.76331
Min-max pre-processing, fold 3: 0.71099

No pre-processing, fold 4: 0.34829
St

No pre-processing, fold 3: 0.36716
Standard pre-processing, fold 3: 0.84357
Min-max pre-processing, fold 3: 0.76388

No pre-processing, fold 4: 0.46703
Standard pre-processing, fold 4: 0.84776
Min-max pre-processing, fold 4: 0.76772

No pre-processing average: 0.41776
Standard pre-processing average: 0.84439
Min-max pre-processing average: 0.76922

C: 10, Degrees: 1, Gamma: 0.1
No pre-processing, fold 1: 0.36906
Standard pre-processing, fold 1: 0.77366
Min-max pre-processing, fold 1: 0.81928

No pre-processing, fold 2: 0.32482
Standard pre-processing, fold 2: 0.81730
Min-max pre-processing, fold 2: 0.84063

No pre-processing, fold 3: 0.43280
Standard pre-processing, fold 3: 0.79787
Min-max pre-processing, fold 3: 0.82590

No pre-processing, fold 4: 0.45834
Standard pre-processing, fold 4: 0.77761
Min-max pre-processing, fold 4: 0.83217

No pre-processing average: 0.39626
Standard pre-processing average: 0.79161
Min-max pre-processing average: 0.82950

C: 10, Degrees: 1, Gamma: 1
No pre

No pre-processing, fold 1: 0.31791
Standard pre-processing, fold 1: 0.78216
Min-max pre-processing, fold 1: 0.53027

No pre-processing, fold 2: 0.37032
Standard pre-processing, fold 2: 0.78613
Min-max pre-processing, fold 2: 0.53003

No pre-processing, fold 3: 0.40947
Standard pre-processing, fold 3: 0.79886
Min-max pre-processing, fold 3: 0.50944

No pre-processing, fold 4: 0.35042
Standard pre-processing, fold 4: 0.78523
Min-max pre-processing, fold 4: 0.52848

No pre-processing average: 0.36203
Standard pre-processing average: 0.78809
Min-max pre-processing average: 0.52455

C: 10, Degrees: 3, Gamma: 0.1
No pre-processing, fold 1: 0.34558
Standard pre-processing, fold 1: 0.78006
Min-max pre-processing, fold 1: 0.83743

No pre-processing, fold 2: 0.41489
Standard pre-processing, fold 2: 0.79237
Min-max pre-processing, fold 2: 0.85644

No pre-processing, fold 3: 0.36791
Standard pre-processing, fold 3: 0.76478
Min-max pre-processing, fold 3: 0.85377

No pre-processing, fold 4: 0.43988

No pre-processing, fold 3: 0.39705
Standard pre-processing, fold 3: 0.84606
Min-max pre-processing, fold 3: 0.76388

No pre-processing, fold 4: 0.47518
Standard pre-processing, fold 4: 0.84776
Min-max pre-processing, fold 4: 0.76772

No pre-processing average: 0.41328
Standard pre-processing average: 0.84419
Min-max pre-processing average: 0.76768

C: 100, Degrees: 1, Gamma: 0.01
No pre-processing, fold 1: 0.38045
Standard pre-processing, fold 1: 0.77157
Min-max pre-processing, fold 1: 0.81928

No pre-processing, fold 2: 0.49006
Standard pre-processing, fold 2: 0.83046
Min-max pre-processing, fold 2: 0.84063

No pre-processing, fold 3: 0.42175
Standard pre-processing, fold 3: 0.79735
Min-max pre-processing, fold 3: 0.82590

No pre-processing, fold 4: 0.45952
Standard pre-processing, fold 4: 0.77726
Min-max pre-processing, fold 4: 0.83217

No pre-processing average: 0.43795
Standard pre-processing average: 0.79416
Min-max pre-processing average: 0.82950

C: 100, Degrees: 1, Gamma: 0.1
N

No pre-processing, fold 1: 0.34122
Standard pre-processing, fold 1: 0.35513
Min-max pre-processing, fold 1: 0.46041

No pre-processing, fold 2: 0.37031
Standard pre-processing, fold 2: 0.37403
Min-max pre-processing, fold 2: 0.45346

No pre-processing, fold 3: 0.44416
Standard pre-processing, fold 3: 0.34865
Min-max pre-processing, fold 3: 0.47218

No pre-processing, fold 4: 0.31011
Standard pre-processing, fold 4: 0.36160
Min-max pre-processing, fold 4: 0.45844

No pre-processing average: 0.36645
Standard pre-processing average: 0.35985
Min-max pre-processing average: 0.46112

C: 100, Degrees: 3, Gamma: 0.01
No pre-processing, fold 1: 0.31791
Standard pre-processing, fold 1: 0.84946
Min-max pre-processing, fold 1: 0.65550

No pre-processing, fold 2: 0.37032
Standard pre-processing, fold 2: 0.87240
Min-max pre-processing, fold 2: 0.67499

No pre-processing, fold 3: 0.40947
Standard pre-processing, fold 3: 0.86203
Min-max pre-processing, fold 3: 0.69588

No pre-processing, fold 4: 0.350

No pre-processing, fold 3: 0.16748
Standard pre-processing, fold 3: 0.72089
Min-max pre-processing, fold 3: 0.70961

No pre-processing, fold 4: 0.16940
Standard pre-processing, fold 4: 0.73807
Min-max pre-processing, fold 4: 0.70851

No pre-processing average: 0.17534
Standard pre-processing average: 0.75617
Min-max pre-processing average: 0.71793



In [12]:
# sklearn neural network
from sklearn.neural_network import MLPClassifier

hidden_layers = [(20), (100), (20, 20), (100, 100)]
alpha_values = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]

for hl in hidden_layers:
    for alpha in alpha_values:
        clf = MLPClassifier(alpha=alpha, hidden_layer_sizes=hl, random_state=seed)
        print("Hidden layers: {}. Alpha value: {}".format(hl, alpha))
        cross_val(X, y, clf, skf)
        print()

Hidden layers: 20. Alpha value: 0.0001
No pre-processing, fold 1: 0.70059
Standard pre-processing, fold 1: 0.85374




Min-max pre-processing, fold 1: 0.81250

No pre-processing, fold 2: 0.67384
Standard pre-processing, fold 2: 0.86672
Min-max pre-processing, fold 2: 0.82909

No pre-processing, fold 3: 0.61731
Standard pre-processing, fold 3: 0.85348
Min-max pre-processing, fold 3: 0.83060

No pre-processing, fold 4: 0.68155
Standard pre-processing, fold 4: 0.86501
Min-max pre-processing, fold 4: 0.81998

No pre-processing average: 0.66832
Standard pre-processing average: 0.85974
Min-max pre-processing average: 0.82304

Hidden layers: 20. Alpha value: 0.001
No pre-processing, fold 1: 0.69365
Standard pre-processing, fold 1: 0.85371
Min-max pre-processing, fold 1: 0.81744

No pre-processing, fold 2: 0.57693
Standard pre-processing, fold 2: 0.86718
Min-max pre-processing, fold 2: 0.83798

No pre-processing, fold 3: 0.61661
Standard pre-processing, fold 3: 0.85388
Min-max pre-processing, fold 3: 0.83333

No pre-processing, fold 4: 0.68971
Standard pre-processing, fold 4: 0.86469
Min-max pre-processing, fo

  'precision', 'predicted', average, warn_for)


Standard pre-processing, fold 1: 0.33344
Min-max pre-processing, fold 1: 0.02222

No pre-processing, fold 2: 0.64540
Standard pre-processing, fold 2: 0.34015
Min-max pre-processing, fold 2: 0.02222

No pre-processing, fold 3: 0.51046
Standard pre-processing, fold 3: 0.33394
Min-max pre-processing, fold 3: 0.02222

No pre-processing, fold 4: 0.64214
Standard pre-processing, fold 4: 0.34815
Min-max pre-processing, fold 4: 0.02222

No pre-processing average: 0.60386
Standard pre-processing average: 0.33892
Min-max pre-processing average: 0.02222

Hidden layers: 100. Alpha value: 0.0001
No pre-processing, fold 1: 0.67230
Standard pre-processing, fold 1: 0.87557
Min-max pre-processing, fold 1: 0.82020

No pre-processing, fold 2: 0.61063
Standard pre-processing, fold 2: 0.88456
Min-max pre-processing, fold 2: 0.84061

No pre-processing, fold 3: 0.64280
Standard pre-processing, fold 3: 0.87684
Min-max pre-processing, fold 3: 0.84626

No pre-processing, fold 4: 0.58590
Standard pre-processing,

Standard pre-processing, fold 2: 0.86668
Min-max pre-processing, fold 2: 0.81390

No pre-processing, fold 3: 0.60357
Standard pre-processing, fold 3: 0.86310
Min-max pre-processing, fold 3: 0.81777

No pre-processing, fold 4: 0.63652
Standard pre-processing, fold 4: 0.85583
Min-max pre-processing, fold 4: 0.81465

No pre-processing average: 0.64342
Standard pre-processing average: 0.85942
Min-max pre-processing average: 0.81533

Hidden layers: (20, 20). Alpha value: 1
No pre-processing, fold 1: 0.57110
Standard pre-processing, fold 1: 0.84531
Min-max pre-processing, fold 1: 0.77297

No pre-processing, fold 2: 0.61774
Standard pre-processing, fold 2: 0.85269
Min-max pre-processing, fold 2: 0.78214

No pre-processing, fold 3: 0.60437
Standard pre-processing, fold 3: 0.85587
Min-max pre-processing, fold 3: 0.78316

No pre-processing, fold 4: 0.57526
Standard pre-processing, fold 4: 0.84807
Min-max pre-processing, fold 4: 0.78423

No pre-processing average: 0.59212
Standard pre-processing 

In [13]:
# sklearn neural network
from sklearn.neural_network import MLPClassifier

hidden_layers = [(30), (200), (30, 30), (200, 200)]
alpha_values = [0.0001, 0.001, 0.01, 0.1, 1, 10]

for hl in hidden_layers:
    for alpha in alpha_values:
        clf = MLPClassifier(alpha=alpha, hidden_layer_sizes=hl, random_state=seed)
        print("Hidden layers: {}. Alpha value: {}".format(hl, alpha))
        cross_val(X, y, clf, skf)
        print()

Hidden layers: 30. Alpha value: 0.0001
No pre-processing, fold 1: 0.68279
Standard pre-processing, fold 1: 0.85978
Min-max pre-processing, fold 1: 0.81971

No pre-processing, fold 2: 0.68810
Standard pre-processing, fold 2: 0.87202
Min-max pre-processing, fold 2: 0.83397

No pre-processing, fold 3: 0.72283
Standard pre-processing, fold 3: 0.86767
Min-max pre-processing, fold 3: 0.83869

No pre-processing, fold 4: 0.71348




Standard pre-processing, fold 4: 0.87029
Min-max pre-processing, fold 4: 0.82881

No pre-processing average: 0.70180
Standard pre-processing average: 0.86744
Min-max pre-processing average: 0.83030

Hidden layers: 30. Alpha value: 0.001
No pre-processing, fold 1: 0.67243
Standard pre-processing, fold 1: 0.85892
Min-max pre-processing, fold 1: 0.82787

No pre-processing, fold 2: 0.68188
Standard pre-processing, fold 2: 0.87224
Min-max pre-processing, fold 2: 0.82739

No pre-processing, fold 3: 0.67319
Standard pre-processing, fold 3: 0.87236
Min-max pre-processing, fold 3: 0.83273

No pre-processing, fold 4: 0.71946
Standard pre-processing, fold 4: 0.86661
Min-max pre-processing, fold 4: 0.82940

No pre-processing average: 0.68674
Standard pre-processing average: 0.86753
Min-max pre-processing average: 0.82935

Hidden layers: 30. Alpha value: 0.01
No pre-processing, fold 1: 0.66367
Standard pre-processing, fold 1: 0.85892
Min-max pre-processing, fold 1: 0.80745

No pre-processing, fold 

  'precision', 'predicted', average, warn_for)


No pre-processing, fold 2: 0.63675
Standard pre-processing, fold 2: 0.89117
Min-max pre-processing, fold 2: 0.84613

No pre-processing, fold 3: 0.59034
Standard pre-processing, fold 3: 0.88120
Min-max pre-processing, fold 3: 0.84341

No pre-processing, fold 4: 0.62935
Standard pre-processing, fold 4: 0.88347
Min-max pre-processing, fold 4: 0.83547

No pre-processing average: 0.61083
Standard pre-processing average: 0.88266
Min-max pre-processing average: 0.83859

Hidden layers: 200. Alpha value: 0.01
No pre-processing, fold 1: 0.54068
Standard pre-processing, fold 1: 0.87819
Min-max pre-processing, fold 1: 0.83012

No pre-processing, fold 2: 0.57234
Standard pre-processing, fold 2: 0.88858
Min-max pre-processing, fold 2: 0.84240

No pre-processing, fold 3: 0.64803
Standard pre-processing, fold 3: 0.87817
Min-max pre-processing, fold 3: 0.84586

No pre-processing, fold 4: 0.60793
Standard pre-processing, fold 4: 0.88430
Min-max pre-processing, fold 4: 0.83565

No pre-processing average:

No pre-processing, fold 3: 0.60767
Standard pre-processing, fold 3: 0.88461
Min-max pre-processing, fold 3: 0.84486

No pre-processing, fold 4: 0.60322
Standard pre-processing, fold 4: 0.87805
Min-max pre-processing, fold 4: 0.83156

No pre-processing average: 0.53683
Standard pre-processing average: 0.88522
Min-max pre-processing average: 0.83400

Hidden layers: (200, 200). Alpha value: 0.001
No pre-processing, fold 1: 0.22600
Standard pre-processing, fold 1: 0.87985
Min-max pre-processing, fold 1: 0.82299

No pre-processing, fold 2: 0.50765
Standard pre-processing, fold 2: 0.89986
Min-max pre-processing, fold 2: 0.83683

No pre-processing, fold 3: 0.55286
Standard pre-processing, fold 3: 0.88233
Min-max pre-processing, fold 3: 0.85591

No pre-processing, fold 4: 0.27240
Standard pre-processing, fold 4: 0.88092
Min-max pre-processing, fold 4: 0.83253

No pre-processing average: 0.38973
Standard pre-processing average: 0.88574
Min-max pre-processing average: 0.83706

Hidden layers: (20