In [None]:
'''
Bagging classifier:

A Bagging classifier is an ensemble meta-estimator that fits base classifiers each on random subsets of the original dataset 
and then aggregate their individual predictions (either by voting or by averaging) to form a final prediction. 

Bagging reduces overfitting (variance) by averaging or voting, however, this leads to an increase in bias, which is compensated 
by the reduction in variance though.

In [None]:
'''
How Bagginer Classifer Works

    - Bagging resamples the original training dataset with replacement, some instance(or data) may be present multiple times 
      while others are left out.
    
    Original training dataset: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10

    Resampled training set 1: 2, 3, 3, 5, 6, 1, 8, 10, 9, 1
    Resampled training set 2: 1, 1, 5, 6, 3, 8, 9, 10, 2, 7
    Resampled training set 3: 1, 5, 8, 9, 2, 10, 9, 7, 5, 4
    

    Classifier generation:

    Let N be the size of the training set.
    for each of t iterations:
        sample N instances with replacement from the original training set.
        apply the learning algorithm to the sample.
        store the resulting classifier.

    Classification:
    for each of the t classifiers:
        predict class of instance using classifier.
    return class that was predicted most often.

In [5]:
import warnings
warnings.filterwarnings('ignore')

In [7]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn import model_selection

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = pd.read_csv("pima-indians-diabetes.csv",names= names)
array = data.values
X = array[:,0:8]
Y = array[:,8]

seed = 8
kfold = model_selection.KFold(n_splits = 3,random_state = seed)
  
# initialize the base classifier
base_cls = DecisionTreeClassifier()
  
# no. of base classifier
num_trees = 500
  
# bagging classifier
model = BaggingClassifier(base_estimator = base_cls,n_estimators = num_trees,random_state = seed)
  
results = model_selection.cross_val_score(model, X, Y, cv = kfold)
print("accuracy :",results.mean())

accuracy : 0.765625

