In [1]:
# importing dataset 
from sklearn.datasets import make_classification

from sklearn.metrics import accuracy_score

# Ensembel Learning 
from sklearn.ensemble import BaggingClassifier

from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

from sklearn.model_selection import train_test_split

In [2]:
X,Y= make_classification(n_samples= 10000, n_features= 10,n_informative= 3)

In [3]:
X_train,X_test,Y_train,Y_test= train_test_split(X,Y, test_size=0.2, random_state= 42)

In [4]:
dt= DecisionTreeClassifier(random_state=42)
dt.fit(X_train,Y_train)
Y_pred= dt.predict(X_test)

print("Decision Tree accuracy",accuracy_score(Y_test,Y_pred)*100)

Decision Tree accuracy 93.25


## Bagging with Decision Tree 
 - recommended

In [5]:
bag= BaggingClassifier(
    base_estimator= DecisionTreeClassifier(),
    n_estimators= 500,
    max_samples= 0.5,
    bootstrap= True,
    random_state= 42,
    n_jobs= -1
)

In [6]:
bag.fit(X_train,Y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.5,
                  n_estimators=500, n_jobs=-1, random_state=42)

In [7]:
Y_pred= bag.predict(X_test)
print("Bagging with 500 Decision Tree  accuracy score ",accuracy_score(Y_test,Y_pred)*100)

Bagging with 500 Decision Tree  accuracy score  95.35


## Bagging with SVM
 - not recommended

In [None]:
bag = BaggingClassifier(
    base_estimator=SVC(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    random_state=42,
    n_jobs= -1
)

In [8]:
bag.fit(X_train,Y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.5,
                  n_estimators=500, n_jobs=-1, random_state=42)

In [9]:
Y_pred= bag.predict(X_test)
print("Bagging with 500 SVM accuracy score ",accuracy_score(Y_test,Y_pred)*100)

Bagging with 500 SVM accuracy score  95.35


## Applying GridSearchCV
  - to finding optimal hyperparameter 

In [10]:
from sklearn.model_selection import GridSearchCV

In [11]:
# Multiple for loop checking for each parameter 
parameters = { 
    'n_estimators': [50, 100, 500], 
    'max_samples': [0.1,0.4, 0.5, 0.7,1.0],
    'bootstrap' : [True,False],
    'max_features' : [0.4,0.7,1.0],
    'bootstrap_features': [True, False],
}
    

In [12]:
search= GridSearchCV(BaggingClassifier(), parameters, cv=5, n_jobs= -1)

In [13]:
search.fit(X_train,Y_train)

GridSearchCV(cv=5, estimator=BaggingClassifier(), n_jobs=-1,
             param_grid={'bootstrap': [True, False],
                         'bootstrap_features': [True, False],
                         'max_features': [0.4, 0.7, 1.0],
                         'max_samples': [0.1, 0.4, 0.5, 0.7, 1.0],
                         'n_estimators': [50, 100, 500]})

In [14]:
search.best_score_ * 100

95.26249999999999

In [15]:
search.best_params_

{'bootstrap': True,
 'bootstrap_features': False,
 'max_features': 0.7,
 'max_samples': 1.0,
 'n_estimators': 500}