In [27]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

In [5]:
#loading the data
data = load_breast_cancer()
X, y = data.data, data.target

In [9]:
X.shape, y.shape

((569, 30), (569,))

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [29]:
#The accuracy from only one estimator
tree = DecisionTreeClassifier(random_state=42)
tree.fit(X_train, y_train)
y_pred = tree.predict(X_test)
print('Accuracy score for the base estimator, Random Tree is', accuracy_score(y_test,y_pred))
print('Classification report for the base estimator, Random Tree is \n', classification_report(y_test,y_pred))

Accuracy score for the base estimator, Random Tree is 0.9473684210526315
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114



## Bagging

In [31]:
#building the bagging classifier
for number_estimators in range(5,5000,100):
    bag_clf = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=number_estimators,
                                bootstrap=True, n_jobs=-1)
    bag_clf.fit(X_train, y_train)
    y_pred = bag_clf.predict(X_test)
    print(f'Accuracy Score for {number_estimators} estimators is', accuracy_score(y_test, y_pred))
    print('Classification report for the base estimator, Random Tree is \n', classification_report(y_test,y_pred))

Accuracy Score for 5 estimators is 0.9385964912280702
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       0.91      0.93      0.92        43
           1       0.96      0.94      0.95        71

    accuracy                           0.94       114
   macro avg       0.93      0.94      0.93       114
weighted avg       0.94      0.94      0.94       114

Accuracy Score for 105 estimators is 0.956140350877193
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       0.95      0.93      0.94        43
           1       0.96      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

Accuracy Score for 205 estimators is 0.956140350877193
Classification report for the base estimator, Random Tre

Accuracy Score for 1905 estimators is 0.956140350877193
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       0.95      0.93      0.94        43
           1       0.96      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

Accuracy Score for 2005 estimators is 0.956140350877193
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       0.95      0.93      0.94        43
           1       0.96      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

Accuracy Score for 2105 estimators is 0.956140350877193
Classification report for the base estimator, Random

Accuracy Score for 3805 estimators is 0.956140350877193
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       0.95      0.93      0.94        43
           1       0.96      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

Accuracy Score for 3905 estimators is 0.956140350877193
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       0.95      0.93      0.94        43
           1       0.96      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

Accuracy Score for 4005 estimators is 0.956140350877193
Classification report for the base estimator, Random

## Adaboost

In [32]:
#using the adaboost
for number_estimators in range(5,5000,100):
    ada_clf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                                 n_estimators=number_estimators,
                                algorithm='SAMME.R', learning_rate=0.5)
    ada_clf.fit(X_train, y_train)
    y_pred = ada_clf.predict(X_test)
    print(f'Accuracy Score for {number_estimators} estimators is', accuracy_score(y_test, y_pred))
    print('Classification report for the base estimator, Random Tree is \n', classification_report(y_test,y_pred))

Accuracy Score for 5 estimators is 0.9649122807017544
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

Accuracy Score for 105 estimators is 0.9649122807017544
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       0.95      0.95      0.95        43
           1       0.97      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.96      0.96       114
weighted avg       0.96      0.96      0.96       114

Accuracy Score for 205 estimators is 0.9736842105263158
Classification report for the base estimator, Random T

Accuracy Score for 1905 estimators is 0.9824561403508771
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

Accuracy Score for 2005 estimators is 0.9824561403508771
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

Accuracy Score for 2105 estimators is 0.9736842105263158
Classification report for the base estimator, Ran

Accuracy Score for 3805 estimators is 0.9824561403508771
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

Accuracy Score for 3905 estimators is 0.9824561403508771
Classification report for the base estimator, Random Tree is 
               precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

Accuracy Score for 4005 estimators is 0.9824561403508771
Classification report for the base estimator, Ran

<li>I tried Bagging and Adaboost ensemble with the base classifier as DecisionTreeClassifier. </li>
<li>The DecisionTreeClassifier on its own gave an accuracy upto 94.7%. </li>
<li>The Bagging and Adaboost methods are expected to increase this accuracy as the number of estimators increases. </li>
<li>The Bagging method initially gave an accuracy of about 93.8% for 5 estimators(less than that of the base estimator itself). It later performed at 95.6% with 105 estimators and then the accuracy remained constant as the number of estimators increased upto 4905 estimators </li>
<li>The Adaboost bagging method was used with the algorithm SAMME.R real boosting algorithm with a learning rate of 0.5. The accuracy increased from 96.4%(for 5 estimators) to 97.3%(for 205 estimators) to 98.2%(for 1905 estimators) and later fluctuated around this level as the number of estimators increased. </li>