In [12]:
"""
==============================================
Using multilayer perceptron for classification
==============================================

"""

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.cross_validation import train_test_split, cross_val_score, StratifiedShuffleSplit 
from sklearn import preprocessing, metrics
from sklearn.metrics import roc_curve, auc
import timeit
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')
from multilayer_perceptron  import MultilayerPerceptronClassifier


In [13]:
## Reading the file
def read_file(trainF,testF, Directory):
    #Directory = "./Data Set 1/splits/"
    train = pd.read_csv(Directory + trainF)
    test =  pd.read_csv(Directory + testF)
    return train, test

In [14]:
## MLP classifier
def mlp_classifier(train, test, accuracy, roc_auc, Target_col,pos_label=None):
    start_time = timeit.default_timer()
    y = train[Target_col]
    X = train.drop([Target_col],axis=1)
    test_labels = test[Target_col]
    test_X = test.drop([Target_col],axis=1)
    clf = MultilayerPerceptronClassifier()
    clf.fit(X,y)
    y_pred = clf.predict(test_X)
    accuracy.append(metrics.accuracy_score(test_labels, y_pred))
    probas_ = clf.predict_proba(test_X)
    # Compute ROC curve and area the curve
    false_positive_rate, true_positive_rate, thresholds = roc_curve(test_labels, probas_[:, 1],pos_label=pos_label)
    roc_auc.append(auc(false_positive_rate, true_positive_rate))
    elapsed = (timeit.default_timer() - start_time)/60
    return accuracy, roc_auc, elapsed

In [15]:
def model_build(filenum,Target_column, df_train, df_test, Directory,pos_label=None):
    accuracy_mlp = []; roc_auc_mlp = []
    Target_col = Target_column
    elapsed_time_mlp = []
    for i in range(1,11):
        trainF = df_train+ str(i) + '.csv'
        testF = df_test + str(i) + '.csv'
        train, test = read_file(trainF,testF,Directory)
        accuracy_mlp, roc_auc_mlp, elapsed = mlp_classifier(train, test, accuracy_mlp, roc_auc_mlp, Target_col,pos_label)
        elapsed_time_mlp.append(elapsed)

    print('Data set# ' + str(filenum))
    print('********** MLP classifier ***********')
    print('Individual file accuracy for MLP')
    print(np.array(accuracy_mlp))
    print('Individual time taken for MLP')
    print(np.array(elapsed_time_mlp))
    print('Accuracy mean   ' + 'Accuracy Stdev  ')
    print(np.array(accuracy_mlp).mean(), np.array(accuracy_mlp).std())
    print('Individual file AUC for MLP')
    print(np.array(roc_auc_mlp))
    print('AUC mean        ' + 'AUC      Stdev  ')
    print(np.array(roc_auc_mlp).mean(), np.array(roc_auc_mlp).std())
    print()

In [16]:
model_build(filenum=1,Target_column='Occupancy', df_train='d1_train', df_test='d1_test', Directory = "./Data Set 1/splits/")

Data set# 1
********** MLP classifier ***********
Individual file accuracy for MLP
[ 0.99402046  0.94785847  0.79020651  0.94813084  0.99335653  0.78862794
  0.78812125  0.94693124  0.79035509  0.99408192]
Individual time taken for MLP
[ 0.07359427  0.02964645  0.00819182  0.0393928   0.08714583  0.02426808
  0.0217269   0.03005011  0.02474961  0.07826636]
Accuracy mean   Accuracy Stdev  
0.898169025034 0.0906531476059
Individual file AUC for MLP
[ 0.99725697  0.96395746  0.5         0.96369044  0.99777457  0.5         0.5
  0.96473696  0.5         0.99696388]
AUC mean        AUC      Stdev  
0.788438028108 0.235859692687



In [17]:
model_build(filenum=3,Target_column='Class', df_train='d3_train', df_test='d3_test', Directory = "./Data Set 3/splits/",pos_label=2)

Data set# 3
********** MLP classifier ***********
Individual file accuracy for MLP
[ 0.81526899  0.81716745  0.81850846  0.81424271  0.817045    0.81857109
  0.81385966  0.82219922  0.81316484  0.81199236]
Individual time taken for MLP
[ 0.19949222  0.18150373  0.19037195  0.15607547  0.15843363  0.19739085
  0.1604215   0.2277425   0.1632268   0.14695876]
Accuracy mean   Accuracy Stdev  
0.816201978309 0.00292716682126
Individual file AUC for MLP
[ 0.87107679  0.87331367  0.87454286  0.8680112   0.87093263  0.86709667
  0.86787613  0.87766201  0.86694778  0.86708087]
AUC mean        AUC      Stdev  
0.870454060431 0.00353588364365



In [18]:
model_build(filenum=5,Target_column='y', df_train='d5_train', df_test='d5_test', Directory = "./Data Set 5/splits/",pos_label=None)

Data set# 5
********** MLP classifier ***********
Individual file accuracy for MLP
[ 0.83170576  0.8250367   0.76553889  0.82892262  0.87820106  0.57007563
  0.8391948   0.86821809  0.84359379  0.87049878]
Individual time taken for MLP
[ 0.07644033  0.07352272  0.10372312  0.10755043  0.08681573  0.04253161
  0.06101409  0.09807952  0.03985525  0.10414775]
Accuracy mean   Accuracy Stdev  
0.812098612611 0.0861588481203
Individual file AUC for MLP
[ 0.46831676  0.46548408  0.29322024  0.49610506  0.49354631  0.51450199
  0.46918661  0.35956919  0.4573502   0.47625672]
AUC mean        AUC      Stdev  
0.449353716229 0.0652860927209



In [19]:
model_build(filenum=9,Target_column='Income level', df_train='d9_train', df_test='d9_test', Directory = "./Data Set 9/splits/",pos_label=None)

Data set# 9
********** MLP classifier ***********
Individual file accuracy for MLP
[ 0.81874387  0.81810578  0.81848739  0.82471546  0.82103074  0.82231794
  0.81592842  0.82242299  0.8187391   0.81942733]
Individual time taken for MLP
[ 0.21082039  0.27596355  0.21683327  0.25650949  0.22437981  0.26444234
  0.29896545  0.23430989  0.27512945  0.21423049]
Accuracy mean   Accuracy Stdev  
0.819991902707 0.00245948201597
Individual file AUC for MLP
[ 0.8618933   0.86415747  0.86404769  0.86704887  0.86918196  0.86959377
  0.86598882  0.87015829  0.86742352  0.86715908]
AUC mean        AUC      Stdev  
0.866665275056 0.0025386423644



In [20]:
model_build(filenum=11,Target_column='C7130', df_train='amlall_train', df_test='amlall_test', Directory = "./data11_amlalll/",pos_label='AML')

Data set# 11
********** MLP classifier ***********
Individual file accuracy for MLP
[ 0.77272727  0.61538462  0.52941176  0.58333333  0.6         0.56521739
  0.64        0.55172414  0.62962963  0.62962963]
Individual time taken for MLP
[ 0.09967528  0.06094197  0.08739951  0.05249316  0.08406341  0.11769849
  0.0821827   0.05540164  0.08308128  0.07916135]
Accuracy mean   Accuracy Stdev  
0.611705777465 0.0639201378626
Individual file AUC for MLP
[ 0.5         0.49673203  0.60888889  0.5         0.5         0.5
  0.47058824  0.5         0.5         0.5       ]
AUC mean        AUC      Stdev  
0.507620915033 0.0348593433965



In [21]:
model_build(filenum=13,Target_column='C7130', df_train='central_train', df_test='central_test', Directory = "./data13_central/",pos_label='Class1')

Data set# 13
********** MLP classifier ***********
Individual file accuracy for MLP
[ 0.58823529  0.66666667  0.52173913  0.61538462  0.58333333  0.47619048
  0.41666667  0.35294118  0.55555556  0.4375    ]
Individual time taken for MLP
[ 0.08435838  0.09304627  0.08068079  0.09020802  0.07994211  0.09096792
  0.08338422  0.07528344  0.07630121  0.08065561]
Accuracy mean   Accuracy Stdev  
0.521421291482 0.0937205828629
Individual file AUC for MLP
[ 0.47115385  0.41176471  0.42916667  0.5375      0.5         0.49545455
  0.4375      0.5         0.56923077  0.5       ]
AUC mean        AUC      Stdev  
0.485177053339 0.0464368301233



In [22]:
model_build(filenum=16,Target_column='TARGET', df_train='d16_original_train', df_test='d16_original_test', Directory = "./Original Dataset_16_Prostrate/",pos_label=None)

Data set# 16
********** MLP classifier ***********
Individual file accuracy for MLP
[ 0.97058824  0.97619048  1.          1.          0.97142857  0.96666667
  1.          0.95918367  0.94594595  1.        ]
Individual time taken for MLP
[ 0.12407472  0.13385019  0.10884812  0.13674891  0.11753037  0.12844653
  0.12341076  0.12131069  0.09498585  0.08934704]
Accuracy mean   Accuracy Stdev  
0.9790003569 0.0188078338373
Individual file AUC for MLP
[ 1.          0.99278846  1.          1.          0.99        0.99111111
  1.          0.99665552  0.98809524  1.        ]
AUC mean        AUC      Stdev  
0.995865032914 0.00461462910591

