# BAGGING

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

%matplotlib inline
warnings.filterwarnings('ignore')

In [2]:
ads = pd.read_csv('Advertising.csv')
ads.sample(3)

Unnamed: 0,Daily Time Spent on Site,Age,Area Income,Daily Internet Usage,Ad Topic Line,City,Male,Country,Timestamp,Clicked on Ad
172,80.23,31,68094.85,196.23,Secured clear-thinking middleware,South Daniellefort,0,Qatar,2016-03-19 14:23:45,0
820,57.51,38,47682.28,105.71,Re-engineered zero-defect open architecture,Jeffreymouth,0,Moldova,2016-03-31 08:53:43,1
466,82.38,35,25603.93,159.6,Polarized analyzing intranet,Port Blake,0,Spain,2016-07-18 01:36:37,1


In [3]:
ads.drop(['Ad Topic Line', 'City', 'Country','Timestamp'], inplace= True, axis= 1)

In [4]:
x = ads[['Daily Time Spent on Site', 'Age', 'Area Income', 'Daily Internet Usage', 'Male']]
y = ads['Clicked on Ad']

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [6]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

$$y = 2\alpha^2 + \beta + \phi - \int\sec$$

In [8]:
from sklearn.ensemble import RandomForestClassifier
rfc_model = RandomForestClassifier(n_estimators = 10)
rfc_model.fit(X_train, y_train)
rfc_preds = rfc_model.predict(X_test)
print(confusion_matrix(y_test, rfc_preds))
print(classification_report(y_test, rfc_preds))

[[97  1]
 [ 8 94]]
              precision    recall  f1-score   support

           0       0.92      0.99      0.96        98
           1       0.99      0.92      0.95       102

    accuracy                           0.95       200
   macro avg       0.96      0.96      0.95       200
weighted avg       0.96      0.95      0.95       200



In [9]:
from sklearn.ensemble import RandomForestClassifier
rfc_model = RandomForestClassifier(n_estimators = 3)
rfc_model.fit(X_train, y_train)
rfc_preds = rfc_model.predict(X_test)
print(confusion_matrix(y_test, rfc_preds))
print(classification_report(y_test, rfc_preds))

[[93  5]
 [ 6 96]]
              precision    recall  f1-score   support

           0       0.94      0.95      0.94        98
           1       0.95      0.94      0.95       102

    accuracy                           0.94       200
   macro avg       0.94      0.95      0.94       200
weighted avg       0.95      0.94      0.95       200



In [10]:
from sklearn.ensemble import RandomForestClassifier
rfc_model = RandomForestClassifier(n_estimators = 5)
rfc_model.fit(X_train, y_train)
rfc_preds = rfc_model.predict(X_test)
print(confusion_matrix(y_test, rfc_preds))
print(classification_report(y_test, rfc_preds))

[[95  3]
 [ 5 97]]
              precision    recall  f1-score   support

           0       0.95      0.97      0.96        98
           1       0.97      0.95      0.96       102

    accuracy                           0.96       200
   macro avg       0.96      0.96      0.96       200
weighted avg       0.96      0.96      0.96       200



In [10]:
from sklearn.ensemble import RandomForestClassifier
rfc_model = RandomForestClassifier(n_estimators = 20)
rfc_model.fit(X_train, y_train)
rfc_preds = rfc_model.predict(X_test)
print(confusion_matrix(y_test, rfc_preds))
print(classification_report(y_test, rfc_preds))

[[106   3]
 [  6  85]]
              precision    recall  f1-score   support

           0       0.95      0.97      0.96       109
           1       0.97      0.93      0.95        91

    accuracy                           0.95       200
   macro avg       0.96      0.95      0.95       200
weighted avg       0.96      0.95      0.95       200



## BOOSTING

In [7]:
from sklearn.ensemble import AdaBoostClassifier
ABC = AdaBoostClassifier(n_estimators = 10)
ABC.fit(X_train, y_train)
ABC_preds = ABC.predict(X_test)
print(confusion_matrix(y_test, ABC_preds))
print(classification_report(y_test, ABC_preds))

[[104   5]
 [  5  86]]
              precision    recall  f1-score   support

           0       0.95      0.95      0.95       109
           1       0.95      0.95      0.95        91

    accuracy                           0.95       200
   macro avg       0.95      0.95      0.95       200
weighted avg       0.95      0.95      0.95       200



In [8]:
from xgboost import XGBRFClassifier
xgb_model = XGBRFClassifier()
xgb_model.fit(X_train, y_train)
xgb_model_preds = xgb_model.predict(X_test)
print(confusion_matrix(y_test, xgb_model_preds))
print(classification_report(y_test, xgb_model_preds))

[[105   4]
 [  5  86]]
              precision    recall  f1-score   support

           0       0.95      0.96      0.96       109
           1       0.96      0.95      0.95        91

    accuracy                           0.95       200
   macro avg       0.96      0.95      0.95       200
weighted avg       0.96      0.95      0.95       200



## VOTING

In [11]:
from sklearn.ensemble import VotingClassifier
vc_model = VotingClassifier(estimators=[('xgb', xgb_model), ('ABC', ABC), ('rf', rfc_model)], voting='hard')
vc_model.fit(X_train, y_train)
vc_pred = vc_model.predict(X_test)
print(confusion_matrix(y_test, vc_pred))
print(classification_report(y_test, vc_pred))

[[107   2]
 [  5  86]]
              precision    recall  f1-score   support

           0       0.96      0.98      0.97       109
           1       0.98      0.95      0.96        91

    accuracy                           0.96       200
   macro avg       0.97      0.96      0.96       200
weighted avg       0.97      0.96      0.96       200



In [12]:
from sklearn.ensemble import VotingClassifier
vc_model = VotingClassifier(estimators=[('xgb', xgb_model), ('ABC', ABC), ('rf', rfc_model)], voting='soft')
vc_model.fit(X_train, y_train)
vc_pred = vc_model.predict(X_test)
print(confusion_matrix(y_test, vc_pred))
print(classification_report(y_test, vc_pred))

[[106   3]
 [  5  86]]
              precision    recall  f1-score   support

           0       0.95      0.97      0.96       109
           1       0.97      0.95      0.96        91

    accuracy                           0.96       200
   macro avg       0.96      0.96      0.96       200
weighted avg       0.96      0.96      0.96       200



## STACKING

In [13]:
from sklearn.ensemble import StackingClassifier
sc_model = StackingClassifier(estimators=[('xgb', xgb_model), ('ABC', ABC), ('rf', rfc_model)])
sc_model.fit(X_train, y_train)
sc_pred = sc_model.predict(X_test)
print(confusion_matrix(y_test, sc_pred))
print(classification_report(y_test, sc_pred))

[[106   3]
 [  5  86]]
              precision    recall  f1-score   support

           0       0.95      0.97      0.96       109
           1       0.97      0.95      0.96        91

    accuracy                           0.96       200
   macro avg       0.96      0.96      0.96       200
weighted avg       0.96      0.96      0.96       200

