# Multiple Classifiers

In [1]:
url = 'https://raw.githubusercontent.com/Sketchjar/Datasets/master/bank-full.csv'

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
df = pd.read_csv(url,sep=';')

In [4]:
df_job = pd.get_dummies(df['job'],drop_first=True)
df_marital = pd.get_dummies(df['marital'],drop_first=True)
df_education = pd.get_dummies(df['education'],drop_first=True)
df_contact = pd.get_dummies(df['contact'],drop_first=True)
df_month = pd.get_dummies(df['month'],drop_first=True)
df_poutcome = pd.get_dummies(df['poutcome'],drop_first=True)

In [5]:
def change(x):
    if x == 'yes':
        return 1
    elif x == 'no':
        return 0
    else:
        return x

In [6]:
df['default'] = df['default'].apply(change)
df['housing'] = df['housing'].apply(change)
df['loan'] = df['loan'].apply(change)
df['y'] = df['y'].apply(change)

In [7]:
df = pd.concat([df,df_job,df_marital,df_education,df_contact,df_month,df_poutcome],axis=1)

In [8]:
df.drop(['job','marital','education','contact','month','poutcome'],axis=1,inplace=True)

In [9]:
df.head(5)

Unnamed: 0,age,default,balance,housing,loan,day,duration,campaign,pdays,previous,...,jul,jun,mar,may,nov,oct,sep,other,success,unknown
0,58,0,2143,1,0,5,261,1,-1,0,...,0,0,0,1,0,0,0,0,0,1
1,44,0,29,1,0,5,151,1,-1,0,...,0,0,0,1,0,0,0,0,0,1
2,33,0,2,1,1,5,76,1,-1,0,...,0,0,0,1,0,0,0,0,0,1
3,47,0,1506,1,0,5,92,1,-1,0,...,0,0,0,1,0,0,0,0,0,1
4,33,0,1,0,0,5,198,1,-1,0,...,0,0,0,1,0,0,0,0,0,1


In [13]:
from sklearn.cross_validation import train_test_split

In [14]:
X = df.drop('y',axis=1)
y = df['y']

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [11]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.dummy import DummyClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier

In [12]:
classifers = {
    "Dummy":DummyClassifier(strategy='uniform',random_state=2),
    "KNN(3)":KNeighborsClassifier(3),
    "Logistic_Regression": LogisticRegression(),
    "Decision_Tree": DecisionTreeClassifier(max_depth=7),
    "Random Forest": RandomForestClassifier(max_depth=7,n_estimators=10,max_features=4),
    "Neural_Net": MLPClassifier(alpha=1),
    "AdaBoost": AdaBoostClassifier(),
    "Naives_Bayes": GaussianNB(),
    "QDA": QuadraticDiscriminantAnalysis(),
    "RBFSVM": SVC(gamma=2, C=1),
    "linearSVC": LinearSVC(),
    "LinearSVM": SVC(kernel='linear'),
    "Gaussian_Process": GaussianProcessClassifier(1.0 * RBF(1.0))
}

In [17]:
from time import time
nfast = 10

head = list(classifers.items())[:nfast]

for name,classifier in head:
    start = time()
    classifier.fit(X_train,y_train)
    train_time = time() - start
    start = time()
    score = classifier.score(X_test,y_test)
    score_time = time() - start
    print("{:<15} | score = {:.3f} | time = {:,.3f}s | {:,.3f}s".format(name,score,train_time,score_time))

Dummy           | score = 0.497 | time = 0.007s | 0.012s
KNN(3)          | score = 0.876 | time = 0.208s | 0.448s
Logistic_Regression | score = 0.902 | time = 0.816s | 0.013s
Decision_Tree   | score = 0.902 | time = 0.169s | 0.004s
Random Forest   | score = 0.892 | time = 0.215s | 0.022s
Neural_Net      | score = 0.865 | time = 1.382s | 0.012s
AdaBoost        | score = 0.901 | time = 1.589s | 0.068s
Naives_Bayes    | score = 0.861 | time = 0.051s | 0.014s
QDA             | score = 0.870 | time = 0.185s | 0.019s
RBFSVM          | score = 0.878 | time = 363.504s | 21.498s
