# <center>Ensemble Machine Learning</center>

Getting Ready...

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = sns.load_dataset('titanic')

In [4]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


***
## Data - Preprocessing

In [5]:
df = df[['survived','pclass','age','parch','fare','adult_male']]

In [6]:
df.dropna(inplace = True)

In [7]:
y = df['survived']
x = df[['pclass','age','parch','fare','adult_male']]

In [8]:
x.head()

Unnamed: 0,pclass,age,parch,fare,adult_male
0,3,22.0,0,7.25,True
1,1,38.0,0,71.2833,False
2,3,26.0,0,7.925,False
3,1,35.0,0,53.1,False
4,3,35.0,0,8.05,True


In [42]:
len(y)

714

***
## Model Fitting

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,random_state=42)

In [11]:
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [12]:
def print_score(clf, x_train, y_train, x_test, y_test, train=True):
    '''
    print the accuracy score, classification report and confusion matrix of classifier
    '''
    if train:
        '''
        training performance
        '''
        print("Train Result:\n")
        print("accuracy score: {0:.4f}\n".format(accuracy_score(y_train, clf.predict(x_train))))
        print("Classification Report: \n {}\n".format(classification_report(y_train, clf.predict(x_train))))
        print("Confusion Matrix: \n {}\n".format(confusion_matrix(y_train, clf.predict(x_train))))

        res = cross_val_score(clf, x_train, y_train, cv=10, scoring='accuracy')
        print("Average Accuracy: \t {0:.4f}".format(np.mean(res)))
        print("Accuracy SD: \t\t {0:.4f}".format(np.std(res)))
        
    elif train==False:
        '''
        test performance
        '''
        print("Test Result:\n")        
        print("accuracy score: {0:.4f}\n".format(accuracy_score(y_test, clf.predict(x_test))))
        print("Classification Report: \n {}\n".format(classification_report(y_test, clf.predict(x_test))))
        print("Confusion Matrix: \n {}\n".format(confusion_matrix(y_test, clf.predict(x_test))))    

In [227]:
__w = dict(round(y.value_counts()/len(y),4))

In [228]:
__w

{0: 0.5938, 1: 0.4062}

### Decision Tree

In [13]:
from sklearn.tree import DecisionTreeClassifier

In [229]:
dt_clf = DecisionTreeClassifier(min_samples_leaf=2,random_state=42,class_weight=__w)

In [230]:
dt_clf.fit(x_train,y_train)

DecisionTreeClassifier(class_weight={0: 0.5938, 1: 0.4062}, criterion='gini',
            max_depth=None, max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=2, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=42,
            splitter='best')

In [231]:
print_score(dt_clf, x_train, y_train, x_test, y_test) #train = true

Train Result:

accuracy score: 0.9279

Classification Report: 
               precision    recall  f1-score   support

           0       0.91      0.98      0.94       298
           1       0.96      0.86      0.91       201

   micro avg       0.93      0.93      0.93       499
   macro avg       0.94      0.92      0.92       499
weighted avg       0.93      0.93      0.93       499


Confusion Matrix: 
 [[291   7]
 [ 29 172]]

Average Accuracy: 	 0.7857
Accuracy SD: 		 0.0571


In [232]:
print_score(dt_clf, x_train, y_train, x_test, y_test, False) #train = false

Test Result:

accuracy score: 0.7767

Classification Report: 
               precision    recall  f1-score   support

           0       0.78      0.86      0.82       126
           1       0.77      0.66      0.71        89

   micro avg       0.78      0.78      0.78       215
   macro avg       0.77      0.76      0.76       215
weighted avg       0.78      0.78      0.77       215


Confusion Matrix: 
 [[108  18]
 [ 30  59]]



***

# Bootstrap Aggregation / Bagging

In [233]:
from sklearn.ensemble import BaggingClassifier

##### Out-Of-Bag Score = True

In [234]:
bag_clf = BaggingClassifier(base_estimator=dt_clf,
                           n_estimators=1000,
                           bootstrap=True,n_jobs=-1,random_state=42,oob_score=True)

In [235]:
bag_clf.fit(x_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight={0: 0.5938, 1: 0.4062}, criterion='gini',
            max_depth=None, max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=2, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=42,
            splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=1000, n_jobs=-1, oob_score=True,
         random_state=42, verbose=0, warm_start=False)

In [236]:
print_score(bag_clf,x_train, y_train, x_test, y_test) #train = True

Train Result:

accuracy score: 0.9218

Classification Report: 
               precision    recall  f1-score   support

           0       0.91      0.97      0.94       298
           1       0.95      0.85      0.90       201

   micro avg       0.92      0.92      0.92       499
   macro avg       0.93      0.91      0.92       499
weighted avg       0.92      0.92      0.92       499


Confusion Matrix: 
 [[289   9]
 [ 30 171]]

Average Accuracy: 	 0.8236
Accuracy SD: 		 0.0583


In [237]:
bag_clf.oob_score_

0.8216432865731463

In [238]:
print_score(bag_clf,x_train, y_train, x_test, y_test, train=False)

Test Result:

accuracy score: 0.8093

Classification Report: 
               precision    recall  f1-score   support

           0       0.81      0.89      0.85       126
           1       0.82      0.70      0.75        89

   micro avg       0.81      0.81      0.81       215
   macro avg       0.81      0.79      0.80       215
weighted avg       0.81      0.81      0.81       215


Confusion Matrix: 
 [[112  14]
 [ 27  62]]



***

##  Random Forest

In [24]:
from sklearn.ensemble import RandomForestClassifier

In [239]:
rf_clf= RandomForestClassifier(oob_score=True,random_state=42,class_weight= __w)

In [240]:
rf_clf.fit(x_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight={0: 0.5938, 1: 0.4062},
            criterion='gini', max_depth=None, max_features='auto',
            max_leaf_nodes=None, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=None, oob_score=True, random_state=42,
            verbose=0, warm_start=False)

In [241]:
print_score(rf_clf, x_train, y_train, x_test, y_test) #train = true

Train Result:

accuracy score: 0.9559

Classification Report: 
               precision    recall  f1-score   support

           0       0.95      0.98      0.96       298
           1       0.97      0.92      0.94       201

   micro avg       0.96      0.96      0.96       499
   macro avg       0.96      0.95      0.95       499
weighted avg       0.96      0.96      0.96       499


Confusion Matrix: 
 [[292   6]
 [ 16 185]]

Average Accuracy: 	 0.8177
Accuracy SD: 		 0.0564


In [242]:
rf_clf.oob_score_

0.781563126252505

In [243]:
print_score(rf_clf, x_train, y_train, x_test, y_test, False) #train = false

Test Result:

accuracy score: 0.8000

Classification Report: 
               precision    recall  f1-score   support

           0       0.80      0.87      0.84       126
           1       0.79      0.70      0.74        89

   micro avg       0.80      0.80      0.80       215
   macro avg       0.80      0.78      0.79       215
weighted avg       0.80      0.80      0.80       215


Confusion Matrix: 
 [[110  16]
 [ 27  62]]



### Grid Search

In [30]:
from sklearn.pipeline import Pipeline

In [31]:
from sklearn.model_selection import GridSearchCV

In [32]:
rf_clf = RandomForestClassifier(random_state=42)

In [33]:
params_grid = {  'max_depth':[3,None], 'min_samples_leaf': [3,5,10],
            'min_samples_split':[2,3,10], 'bootstrap':[True,False],
              'criterion': ['gini','entropy']}

In [34]:
grid_search = GridSearchCV(rf_clf, params_grid,n_jobs=-1,cv =5 , verbose =1,scoring= 'accuracy')

In [35]:
grid_search.fit(x_train,y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  54 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.9s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
            oob_score=False, random_state=42, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'max_depth': [3, None], 'min_samples_leaf': [3, 5, 10], 'min_samples_split': [2, 3, 10], 'bootstrap': [True, False], 'criterion': ['gini', 'entropy']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=1)

In [36]:
grid_search.best_score_

0.8476953907815631

In [37]:
grid_search.best_estimator_.get_params()

{'bootstrap': True,
 'class_weight': None,
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 3,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 10,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 42,
 'verbose': 0,
 'warm_start': False}

In [38]:
print_score(grid_search, x_train, y_train, x_test, y_test) #train = true

Train Result:

accuracy score: 0.8918

Classification Report: 
               precision    recall  f1-score   support

           0       0.88      0.95      0.91       298
           1       0.92      0.80      0.86       201

   micro avg       0.89      0.89      0.89       499
   macro avg       0.90      0.88      0.88       499
weighted avg       0.89      0.89      0.89       499


Confusion Matrix: 
 [[284  14]
 [ 40 161]]

Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 212 tasks      | elapsed:    4.1s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    6.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done 212 tasks      | elapsed:    4.3s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    2.8s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    6.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    6.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done 130 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    7.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    6.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    6.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done 212 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    6.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    1.4s


Average Accuracy: 	 0.8318
Accuracy SD: 		 0.0650


[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed:    6.6s finished


In [39]:
print_score(grid_search, x_train, y_train, x_test, y_test, False) #train = false

Test Result:

accuracy score: 0.8140

Classification Report: 
               precision    recall  f1-score   support

           0       0.81      0.89      0.85       126
           1       0.82      0.71      0.76        89

   micro avg       0.81      0.81      0.81       215
   macro avg       0.81      0.80      0.80       215
weighted avg       0.81      0.81      0.81       215


Confusion Matrix: 
 [[112  14]
 [ 26  63]]



***
## Extra Trees

In [43]:
from sklearn.ensemble import ExtraTreesClassifier

In [244]:
xt_clf= ExtraTreesClassifier(random_state=42,min_samples_leaf=5,class_weight=__w)

In [245]:
xt_clf.fit(x_train,y_train)

ExtraTreesClassifier(bootstrap=False, class_weight={0: 0.5938, 1: 0.4062},
           criterion='gini', max_depth=None, max_features='auto',
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=5,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=None, oob_score=False, random_state=42,
           verbose=0, warm_start=False)

In [246]:
print_score(xt_clf, x_train, y_train, x_test, y_test,True) 

Train Result:

accuracy score: 0.8377

Classification Report: 
               precision    recall  f1-score   support

           0       0.80      0.98      0.88       298
           1       0.95      0.63      0.76       201

   micro avg       0.84      0.84      0.84       499
   macro avg       0.87      0.80      0.82       499
weighted avg       0.86      0.84      0.83       499


Confusion Matrix: 
 [[291   7]
 [ 74 127]]

Average Accuracy: 	 0.8178
Accuracy SD: 		 0.0515


In [247]:
print_score(xt_clf, x_train, y_train, x_test, y_test, False) #train = false

Test Result:

accuracy score: 0.8047

Classification Report: 
               precision    recall  f1-score   support

           0       0.77      0.95      0.85       126
           1       0.90      0.60      0.72        89

   micro avg       0.80      0.80      0.80       215
   macro avg       0.83      0.77      0.78       215
weighted avg       0.82      0.80      0.80       215


Confusion Matrix: 
 [[120   6]
 [ 36  53]]



***

# Boosting (Hypothesis Boosting)

## AdaBoost (with Random Forest)

In [48]:
from sklearn.ensemble import AdaBoostClassifier

In [49]:
ada_clf= AdaBoostClassifier(RandomForestClassifier(n_estimators=100))

In [50]:
ada_clf.fit(x_train,y_train)

AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
          learning_rate=1.0, n_estimators=50, random_state=None)

In [51]:
print_score(ada_clf, x_train, y_train, x_test, y_test,True) 

Train Result:

accuracy score: 0.9920

Classification Report: 
               precision    recall  f1-score   support

           0       0.99      1.00      0.99       298
           1       0.99      0.99      0.99       201

   micro avg       0.99      0.99      0.99       499
   macro avg       0.99      0.99      0.99       499
weighted avg       0.99      0.99      0.99       499


Confusion Matrix: 
 [[297   1]
 [  3 198]]

Average Accuracy: 	 0.8017
Accuracy SD: 		 0.0484


In [52]:
print_score(ada_clf, x_train, y_train, x_test, y_test, False) #train = false

Test Result:

accuracy score: 0.8000

Classification Report: 
               precision    recall  f1-score   support

           0       0.81      0.86      0.83       126
           1       0.78      0.72      0.75        89

   micro avg       0.80      0.80      0.80       215
   macro avg       0.80      0.79      0.79       215
weighted avg       0.80      0.80      0.80       215


Confusion Matrix: 
 [[108  18]
 [ 25  64]]



## Gradient Boosting Machine

In [53]:
from sklearn.ensemble import GradientBoostingClassifier

In [54]:
grad_clf= GradientBoostingClassifier()

In [55]:
grad_clf.fit(x_train,y_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              n_iter_no_change=None, presort='auto', random_state=None,
              subsample=1.0, tol=0.0001, validation_fraction=0.1,
              verbose=0, warm_start=False)

In [56]:
print_score(grad_clf, x_train, y_train, x_test, y_test,True) 

Train Result:

accuracy score: 0.9259

Classification Report: 
               precision    recall  f1-score   support

           0       0.92      0.96      0.94       298
           1       0.94      0.87      0.90       201

   micro avg       0.93      0.93      0.93       499
   macro avg       0.93      0.92      0.92       499
weighted avg       0.93      0.93      0.93       499


Confusion Matrix: 
 [[287  11]
 [ 26 175]]

Average Accuracy: 	 0.8198
Accuracy SD: 		 0.0469


In [57]:
print_score(grad_clf, x_train, y_train, x_test, y_test, False) #train = false

Test Result:

accuracy score: 0.8047

Classification Report: 
               precision    recall  f1-score   support

           0       0.82      0.86      0.84       126
           1       0.78      0.73      0.76        89

   micro avg       0.80      0.80      0.80       215
   macro avg       0.80      0.79      0.80       215
weighted avg       0.80      0.80      0.80       215


Confusion Matrix: 
 [[108  18]
 [ 24  65]]



## eXtreme Gradient Boosting 

In [58]:
import xgboost as xgb

In [102]:
xgb_clf= xgb.XGBClassifier(max_depth=5,n_estimators=1000,learning_rate=0.2)

In [103]:
xgb_clf.fit(x_train,y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.2, max_delta_step=0,
       max_depth=5, min_child_weight=1, missing=None, n_estimators=1000,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [104]:
print_score(xgb_clf, x_train, y_train, x_test, y_test,True) 

Train Result:

accuracy score: 0.9880

Classification Report: 
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       298
           1       0.99      0.98      0.98       201

   micro avg       0.99      0.99      0.99       499
   macro avg       0.99      0.99      0.99       499
weighted avg       0.99      0.99      0.99       499


Confusion Matrix: 
 [[296   2]
 [  4 197]]

Average Accuracy: 	 0.7996
Accuracy SD: 		 0.0499


In [105]:
print_score(xgb_clf, x_train, y_train, x_test, y_test, False) #train = false

Test Result:

accuracy score: 0.7860

Classification Report: 
               precision    recall  f1-score   support

           0       0.82      0.82      0.82       126
           1       0.74      0.74      0.74        89

   micro avg       0.79      0.79      0.79       215
   macro avg       0.78      0.78      0.78       215
weighted avg       0.79      0.79      0.79       215


Confusion Matrix: 
 [[103  23]
 [ 23  66]]



***

# Ensemble of Trees

In [261]:
en_tree_train = pd.concat([pd.DataFrame(dt_clf.predict_proba(x_train))[1],
                    pd.DataFrame(rf_clf.predict_proba(x_train))[1],
                    pd.DataFrame(xt_clf.predict_proba(x_train))[1]],axis=1)

In [262]:
en_tree_train.columns = ['dt','rf','xt']

In [268]:
meta_clf = GradientBoostingClassifier()

In [269]:
meta_clf.fit(en_tree_train,y_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              n_iter_no_change=None, presort='auto', random_state=None,
              subsample=1.0, tol=0.0001, validation_fraction=0.1,
              verbose=0, warm_start=False)

In [252]:
en_tree_test = pd.concat([pd.DataFrame(dt_clf.predict_proba(x_test))[1],
                    pd.DataFrame(rf_clf.predict_proba(x_test))[1],
                    pd.DataFrame(xt_clf.predict_proba(x_test))[1]],axis=1)

In [253]:
en_tree_test.columns = ['dt','rf','xt']

In [254]:
print_score(meta_clf, en_tree_train, y_train, en_tree_test, y_test)

Train Result:

accuracy score: 0.9900

Classification Report: 
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       298
           1       0.99      0.99      0.99       201

   micro avg       0.99      0.99      0.99       499
   macro avg       0.99      0.99      0.99       499
weighted avg       0.99      0.99      0.99       499


Confusion Matrix: 
 [[296   2]
 [  3 198]]

Average Accuracy: 	 0.9660
Accuracy SD: 		 0.0178


In [255]:
print_score(meta_clf, en_tree_train, y_train, en_tree_test, y_test,False)

Test Result:

accuracy score: 0.7860

Classification Report: 
               precision    recall  f1-score   support

           0       0.81      0.83      0.82       126
           1       0.75      0.72      0.74        89

   micro avg       0.79      0.79      0.79       215
   macro avg       0.78      0.78      0.78       215
weighted avg       0.79      0.79      0.79       215


Confusion Matrix: 
 [[105  21]
 [ 25  64]]



***

In [270]:
en_boost_train = pd.concat([pd.DataFrame(ada_clf.predict_proba(x_train))[1],
                    pd.DataFrame(grad_clf.predict_proba(x_train))[1],
                    pd.DataFrame(xgb_clf.predict_proba(x_train))[1]],axis=1)

In [257]:
en_boost_train.columns = ['ada','grad','xgb']

In [258]:
from sklearn.linear_model import LogisticRegression

In [271]:
meta_reg = LogisticRegression(fit_intercept=False)

In [272]:
meta_reg.fit(en_boost_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=False,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [273]:
en_boost_test = pd.concat([pd.DataFrame(ada_clf.predict_proba(x_test))[1],
                    pd.DataFrame(grad_clf.predict_proba(x_test))[1],
                    pd.DataFrame(xgb_clf.predict_proba(x_test))[1]],axis=1)

In [274]:
en_boost_test.columns = ['ada','grad','xgb']

In [275]:
print_score(meta_reg, en_boost_train, y_train, en_boost_test, y_test)

Train Result:

accuracy score: 0.9800

Classification Report: 
               precision    recall  f1-score   support

           0       0.99      0.97      0.98       298
           1       0.96      0.99      0.98       201

   micro avg       0.98      0.98      0.98       499
   macro avg       0.98      0.98      0.98       499
weighted avg       0.98      0.98      0.98       499


Confusion Matrix: 
 [[290   8]
 [  2 199]]

Average Accuracy: 	 0.9800
Accuracy SD: 		 0.0217


In [276]:
print_score(meta_reg, en_boost_train, y_train, en_boost_test, y_test,False)

Test Result:

accuracy score: 0.7581

Classification Report: 
               precision    recall  f1-score   support

           0       0.81      0.76      0.79       126
           1       0.69      0.75      0.72        89

   micro avg       0.76      0.76      0.76       215
   macro avg       0.75      0.76      0.75       215
weighted avg       0.76      0.76      0.76       215


Confusion Matrix: 
 [[96 30]
 [22 67]]

