In [6]:
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 30 14:31:40 2017
Authors: K132047 | Sahir
           
Data Science Project
Code:   Presents a Comparision of Different Classifiers and
        Applies Multi-Layer Perceptron Classifier on the UCI
        Poker Hand Data Set
"""
#-------------------------------------------------------------------------
# All the Libraries: 
#------------------------------------------------------------------------- 
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from sklearn.multiclass import OutputCodeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn import svm
#----------------------------------------------------------------
#Read the Training and Testing Data:
#----------------------------------------------------------------
data_train = pd.read_csv(filepath_or_buffer="poker-hand-training-true.data", sep=',', header=None)
data_test = pd.read_csv(filepath_or_buffer="poker-hand-testing.data", sep=',', header=None)
#----------------------------------------------------------------
#Print it's Shape to get an idea of the data set:
#----------------------------------------------------------------
print(data_train.shape)
print(data_test.shape)
#----------------------------------------------------------------
#Prepare the Data for Training and Testing:
#----------------------------------------------------------------
#Ready the Train Data
array_train = data_train.values
data_train = array_train[:,0:10]
label_train = array_train[:,10]
#Ready the Test Data
array_test = data_test.values
data_test = array_test[:,0:10]
label_test = array_test[:,10]
#----------------------------------------------------------------
# Scaling the Data for our Main Model
#----------------------------------------------------------------
# Scale the Data to Make the NN easier to converge
scaler = StandardScaler()
# Fit only to the training data
scaler.fit(data_train)  
# Transform the training and testing data
data_train = scaler.transform(data_train)
data_test = scaler.transform(data_test)
#----------------------------------------------------------------
#Apply the MLPClassifier:
#----------------------------------------------------------------
acc_array = [0] * 5
for s in range (1,6):
    #Init MLPClassifier
    clf = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(64,64),
                        activation='tanh', learning_rate_init=0.02,max_iter=2000,random_state=s)
    #Fit the Model
    result = clf.fit(data_train, label_train)
    #Predict
    prediction = clf.predict(data_test)
    #Get Accuracy
    acc = accuracy_score(label_test, prediction)
    #Store in the Array
    acc_array[s-1] = acc
#----------------------------------------------------------------
#Fetch & Print the Results:
#----------------------------------------------------------------
    print(classification_report(label_test,prediction))
    print("Accuracy using MLPClassifier and Random Seed:",s,":",str(acc)) 
    print(confusion_matrix(label_test, prediction))
print("Mean Accuracy using MLPClassifier Classifier: ",np.array(acc_array).mean())
#----------------------------------------------------------------
# Init the Models for Comparision
#----------------------------------------------------------------
models = [BaggingClassifier(), RandomForestClassifier(), AdaBoostClassifier(), 
          KNeighborsClassifier(),GaussianNB(),tree.DecisionTreeClassifier(),
          svm.SVC(kernel='linear', C=1), OutputCodeClassifier(BaggingClassifier()),
            OneVsRestClassifier(svm.SVC(kernel='linear'))]

model_names = ["Bagging with DT", "Random Forest", "AdaBoost", "KNN","Naive Bayes","Decision Tree",
               "Linear SVM","OutputCodeClassifier with Linear SVM" ,"OneVsRestClassifier with Linear SVM"]
#----------------------------------------------------------------
# Run Each Model
#----------------------------------------------------------------
for model,name in zip(models,model_names):
    model.fit(data_train, label_train)
    # Display the relative importance of each attribute
    if name == "Random Forest":
        print(model.feature_importances_)   
    #Predict
    prediction = model.predict(data_test)
    # Print Accuracy
    acc = accuracy_score(label_test, prediction)
    print("Accuracy Using",name,": " + str(acc)+'\n')
    print(classification_report(label_test,prediction))
    print(confusion_matrix(label_test, prediction))


(25010, 11)
(1000000, 11)




              precision    recall  f1-score   support

           0       0.99      0.99      0.99    501209
           1       1.00      0.99      0.99    422498
           2       0.90      0.97      0.93     47622
           3       0.90      0.90      0.90     21121
           4       0.70      0.06      0.11      3885
           5       0.17      0.22      0.20      1996
           6       0.78      0.47      0.59      1424
           7       0.39      0.36      0.37       230
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00         3

   micro avg       0.98      0.98      0.98   1000000
   macro avg       0.58      0.50      0.51   1000000
weighted avg       0.98      0.98      0.98   1000000

Accuracy using MLPClassifier and Random Seed: 1 : 0.981586
[[498081   1210      0      0     82   1708      0      0    124      4]
 [  1658 417025   3115    117     17    413      0      0    152      1]
 [     0    484  45985   1123      0

[[380078 120481    539     78     15     17      0      0      1      0]
 [216808 201524   3365    712     74      4      8      1      1      1]
 [ 14946  30854   1575    232     14      0      1      0      0      0]
 [  5349  14653    553    553      8      0      5      0      0      0]
 [   918   2850     67     23     25      0      0      0      0      2]
 [  1556    429      1      0      0     10      0      0      0      0]
 [   175   1084     99     60      1      0      5      0      0      0]
 [    21    160     29     20      0      0      0      0      0      0]
 [     4      8      0      0      0      0      0      0      0      0]
 [     1      2      0      0      0      0      0      0      0      0]]




[0.06028058 0.14108714 0.06824387 0.13077554 0.06116078 0.14253356
 0.06551131 0.13400471 0.057808   0.13859452]
Accuracy Using Random Forest : 0.554046



  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.58      0.74      0.65    501209
           1       0.51      0.43      0.47    422498
           2       0.20      0.01      0.03     47622
           3       0.25      0.01      0.02     21121
           4       0.12      0.00      0.00      3885
           5       0.70      0.02      0.03      1996
           6       0.00      0.00      0.00      1424
           7       0.00      0.00      0.00       230
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00         3

   micro avg       0.55      0.55      0.55   1000000
   macro avg       0.24      0.12      0.12   1000000
weighted avg       0.52      0.55      0.53   1000000

[[370571 130095    446     77      8     11      0      0      1      0]
 [237670 182573   1897    308     42      3      2      1      2      0]
 [ 19432  27395    689    101      2      0      3      0      0      0]
 [  7141  13543    255

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.50      0.98      0.66    501209
           1       0.00      0.00      0.00    422498
           2       0.00      0.00      0.00     47622
           3       0.00      0.00      0.00     21121
           4       0.00      0.00      0.00      3885
           5       0.00      0.00      0.00      1996
           6       0.00      0.00      0.00      1424
           7       0.00      0.00      0.00       230
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00         3

   micro avg       0.49      0.49      0.49   1000000
   macro avg       0.05      0.10      0.07   1000000
weighted avg       0.25      0.49      0.33   1000000

[[491576      0      0      0      0      0      0      0      0   9633]
 [414414      0      0      0      0      0      0      0      0   8084]
 [ 46709      0      0      0      0      0      0      0      0    913]
 [ 20729      0      0

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.55      0.66      0.60    501209
           1       0.46      0.42      0.44    422498
           2       0.16      0.01      0.02     47622
           3       0.11      0.01      0.01     21121
           4       0.04      0.00      0.00      3885
           5       0.58      0.02      0.04      1996
           6       0.00      0.00      0.00      1424
           7       0.00      0.00      0.00       230
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00         3

   micro avg       0.51      0.51      0.51   1000000
   macro avg       0.19      0.11      0.11   1000000
weighted avg       0.48      0.51      0.49   1000000

[[331578 168515    883    197     15     21      0      0      0      0]
 [240298 179460   2025    621     79     12      2      0      0      1]
 [ 23707  23080    626    182     25      0      1      0      1      0]
 [  8406  12283    296

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.50      1.00      0.67    501209
           1       0.00      0.00      0.00    422498
           2       0.00      0.00      0.00     47622
           3       0.00      0.00      0.00     21121
           4       0.00      0.00      0.00      3885
           5       0.00      0.00      0.00      1996
           6       0.00      0.00      0.00      1424
           7       0.00      0.00      0.00       230
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00         3

   micro avg       0.50      0.50      0.50   1000000
   macro avg       0.05      0.10      0.07   1000000
weighted avg       0.25      0.50      0.33   1000000

[[501209      0      0      0      0      0      0      0      0      0]
 [422498      0      0      0      0      0      0      0      0      0]
 [ 47622      0      0      0      0      0      0      0      0      0]
 [ 21121      0      0

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           0       0.50      1.00      0.67    501209
           1       0.00      0.00      0.00    422498
           2       0.00      0.00      0.00     47622
           3       0.00      0.00      0.00     21121
           4       0.00      0.00      0.00      3885
           5       0.00      0.00      0.00      1996
           6       0.00      0.00      0.00      1424
           7       0.00      0.00      0.00       230
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00         3

   micro avg       0.50      0.50      0.50   1000000
   macro avg       0.05      0.10      0.07   1000000
weighted avg       0.25      0.50      0.33   1000000

[[501209      0      0      0      0      0      0      0      0      0]
 [422498      0      0      0      0      0      0      0      0      0]
 [ 47622      0      0      0      0      0      0      0      0      0]
 [ 21121      0      0

AttributeError: module 'sklearn.tree' has no attribute 'plot_tree'