## Comparing BaggingClassifier using Decision Tree and RandomForest


In [9]:
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
import pandas as pd
import matplotlib.pyplot as plt

In [10]:
X=load_breast_cancer().data
y=load_breast_cancer().target
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=32)

In [11]:
dt=DecisionTreeClassifier()
params={
    "criterion":['gini','entropy'],
    "max_depth":[1,2,3,4,5,6,7,None],
    "min_samples_split":[2,5,10],
    "min_samples_leaf":[1,2,4,8]
}
grid=GridSearchCV(estimator=dt,param_grid=params,cv=5,scoring="accuracy")
grid.fit(X_train,y_train)
print(f"Best params {grid.best_params_}")
print(f"Best score {grid.best_score_}")
dt_model=grid.best_estimator_
y_pred=dt_model.predict(X_test)
acc=accuracy_score(y_test,y_pred)
print("Accuracy : ",acc)


Best params {'criterion': 'gini', 'max_depth': 6, 'min_samples_leaf': 4, 'min_samples_split': 2}
Best score 0.964835164835165
Accuracy :  0.9210526315789473


In [12]:
bc=BaggingClassifier(random_state=11)
params={
    "estimator":[DecisionTreeClassifier(criterion= 'entropy', max_depth= 6, min_samples_leaf= 1, min_samples_split= 2 )],
    "n_estimators":[2,5,10,20,30,40,50],
    "max_samples":[0.1,0.25,0.50,0.75],
    "bootstrap":[False,True],
    
}
grid_bagging=GridSearchCV(estimator=bc,param_grid=params,cv=5,scoring="accuracy")
grid_bagging.fit(X_train,y_train)
print(f"Best params {grid_bagging.best_params_}")
print(f"Best score {grid_bagging.best_score_}")
dt_model=grid_bagging.best_estimator_
y_pred_bagging=dt_model.predict(X_test)
acc=accuracy_score(y_test,y_pred_bagging)
print("Accuracy : ",acc)

Best params {'bootstrap': False, 'estimator': DecisionTreeClassifier(criterion='entropy', max_depth=6), 'max_samples': 0.5, 'n_estimators': 30}
Best score 0.964835164835165
Accuracy :  0.9385964912280702


**Random Forest**

In [13]:
from sklearn.ensemble import RandomForestClassifier

In [14]:
rc=RandomForestClassifier(random_state=56)
rc.fit(X_train,y_train)
y_pred=rc.predict(X_test)
acc=accuracy_score(y_test,y_pred)
print("Accuracy : ",acc)

Accuracy :  0.9473684210526315


In [None]:
params={
    "criterion":['gini','entropy'],
    "max_depth":[1,2,3,4,5,6,7,None],
    "min_samples_split":[2,5,10],
    "min_samples_leaf":[1,2,4,8],
    "max_features":['sqrt','log2'],
    "bootstrap":[True,False],
    "max_samples":[0.25,0.50,0.75]
    }
grid=GridSearchCV(estimator=RandomForestClassifier(),param_grid=params,cv=5,verbose=2,scoring="accuracy")
grid.fit(X_train,y_train)
print(f"Best params {grid.best_params_}")
print(f"Best score {grid.best_score_}")
model=grid.best_estimator_
y_pred=model.predict(X_test)
acc=accuracy_score(y_test,y_pred)
print("Accuracy : ",acc)