In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

%matplotlib inline

In [2]:
df = pd.read_csv('ITIM_Format_Train.csv')

In [3]:
df.head()

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Titles,Number_Names,Ticket Prefix,male,Q,S
0,0,3,22.0,1,0,7.25,0,4,1,1,0,1
1,1,1,38.0,1,0,71.2833,0,7,1,0,0,0
2,1,3,26.0,0,0,7.925,0,3,1,0,0,1
3,1,1,35.0,1,0,53.1,0,7,0,0,0,1
4,0,3,35.0,0,0,8.05,0,4,0,1,0,1


In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X = df.drop(['Survived'],axis=1)
y = df['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [8]:
from sklearn.ensemble import AdaBoostClassifier

In [9]:
clf = AdaBoostClassifier(n_estimators=100)
clf.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=100, random_state=None)

In [10]:
predictions = clf.predict(X_test)

In [11]:
from sklearn.metrics import classification_report,confusion_matrix

In [12]:
print(confusion_matrix(y_test,predictions))
print('\n')
print(classification_report(y_test,predictions))

[[145  21]
 [ 24  77]]


              precision    recall  f1-score   support

           0       0.86      0.87      0.87       166
           1       0.79      0.76      0.77       101

    accuracy                           0.83       267
   macro avg       0.82      0.82      0.82       267
weighted avg       0.83      0.83      0.83       267



In [20]:
from sklearn.model_selection import GridSearchCV

In [29]:
param_grid = {
 'n_estimators': [50, 100],
 'learning_rate' : [0.01,0.05,0.1,0.3,1],
 'algorithm' : ['SAMME', 'SAMME.R']
 }

hp_clf = GridSearchCV(AdaBoostClassifier(),
                            param_grid=param_grid,
                            cv=3,verbose=2)

hp_clf.fit(X_train, y_train)

Fitting 3 folds for each of 20 candidates, totalling 60 fits
[CV] algorithm=SAMME, learning_rate=0.01, n_estimators=50 ............
[CV]  algorithm=SAMME, learning_rate=0.01, n_estimators=50, total=   0.1s
[CV] algorithm=SAMME, learning_rate=0.01, n_estimators=50 ............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


[CV]  algorithm=SAMME, learning_rate=0.01, n_estimators=50, total=   0.1s
[CV] algorithm=SAMME, learning_rate=0.01, n_estimators=50 ............
[CV]  algorithm=SAMME, learning_rate=0.01, n_estimators=50, total=   0.1s
[CV] algorithm=SAMME, learning_rate=0.01, n_estimators=100 ...........
[CV]  algorithm=SAMME, learning_rate=0.01, n_estimators=100, total=   0.2s
[CV] algorithm=SAMME, learning_rate=0.01, n_estimators=100 ...........
[CV]  algorithm=SAMME, learning_rate=0.01, n_estimators=100, total=   0.2s
[CV] algorithm=SAMME, learning_rate=0.01, n_estimators=100 ...........
[CV]  algorithm=SAMME, learning_rate=0.01, n_estimators=100, total=   0.2s
[CV] algorithm=SAMME, learning_rate=0.05, n_estimators=50 ............
[CV]  algorithm=SAMME, learning_rate=0.05, n_estimators=50, total=   0.1s
[CV] algorithm=SAMME, learning_rate=0.05, n_estimators=50 ............
[CV]  algorithm=SAMME, learning_rate=0.05, n_estimators=50, total=   0.2s
[CV] algorithm=SAMME, learning_rate=0.05, n_estimator

[CV]  algorithm=SAMME.R, learning_rate=1, n_estimators=100, total=   0.2s
[CV] algorithm=SAMME.R, learning_rate=1, n_estimators=100 ............
[CV]  algorithm=SAMME.R, learning_rate=1, n_estimators=100, total=   0.2s


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:   10.3s finished


GridSearchCV(cv=3, error_score='raise-deprecating',
             estimator=AdaBoostClassifier(algorithm='SAMME.R',
                                          base_estimator=None,
                                          learning_rate=1.0, n_estimators=50,
                                          random_state=None),
             iid='warn', n_jobs=None,
             param_grid={'algorithm': ['SAMME', 'SAMME.R'],
                         'learning_rate': [0.01, 0.05, 0.1, 0.3, 1],
                         'n_estimators': [50, 100]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=2)

In [30]:
hp_clf.best_params_

{'algorithm': 'SAMME', 'learning_rate': 1, 'n_estimators': 100}

In [31]:
hp_predictions = hp_clf.predict(X_test)

In [32]:
#Original Model
print(confusion_matrix(y_test,predictions))
print('\n')
print(classification_report(y_test,predictions))

[[145  21]
 [ 24  77]]


              precision    recall  f1-score   support

           0       0.86      0.87      0.87       166
           1       0.79      0.76      0.77       101

    accuracy                           0.83       267
   macro avg       0.82      0.82      0.82       267
weighted avg       0.83      0.83      0.83       267



In [33]:
#Hyperparameter Tuned Model
print(confusion_matrix(y_test,hp_predictions))
print('\n')
print(classification_report(y_test,hp_predictions))

[[147  19]
 [ 27  74]]


              precision    recall  f1-score   support

           0       0.84      0.89      0.86       166
           1       0.80      0.73      0.76       101

    accuracy                           0.83       267
   macro avg       0.82      0.81      0.81       267
weighted avg       0.83      0.83      0.83       267

