## Checking the Bagging Ensemble Method

**Classification**

In [1]:
from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import GridSearchCV,train_test_split
from sklearn.metrics import accuracy_score

In [2]:
X=load_wine().data
y=load_wine().target
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=32)

In [3]:
dt=DecisionTreeClassifier()
params={
    "criterion":['gini','entropy'],
    "max_depth":[1,2,3,4,5,6,7,None],
    "min_samples_split":[2,5,10],
    "min_samples_leaf":[1,2,4,8]
}
grid=GridSearchCV(estimator=dt,param_grid=params,cv=5,scoring="accuracy")
grid.fit(X_train,y_train)
print(f"Best params {grid.best_params_}")
print(f"Best score {grid.best_score_}")
dt_model=grid.best_estimator_
y_pred=dt_model.predict(X_test)
acc=accuracy_score(y_test,y_pred)
print("Accuracy : ",acc)


Best params {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 1, 'min_samples_split': 2}
Best score 0.9226600985221676
Accuracy :  0.9166666666666666


In [4]:
bc=BaggingClassifier(random_state=11)
params={
    "estimator":[DecisionTreeClassifier(criterion= 'entropy', max_depth= 6, min_samples_leaf= 1, min_samples_split= 2 )],
    "n_estimators":[2,5,10,20,30,40,50],
    "max_samples":[0.1,0.25,0.50,0.75],
    "bootstrap":[False,True],
    # "oob_score":[True]
}
grid_bagging=GridSearchCV(estimator=bc,param_grid=params,cv=5,scoring="accuracy")
grid_bagging.fit(X_train,y_train)
print(f"Best params {grid_bagging.best_params_}")
print(f"Best score {grid_bagging.best_score_}")
dt_model=grid_bagging.best_estimator_
y_pred_bagging=dt_model.predict(X_test)
acc=accuracy_score(y_test,y_pred_bagging)
print("Accuracy : ",acc)

Best params {'bootstrap': True, 'estimator': DecisionTreeClassifier(criterion='entropy', max_depth=6), 'max_samples': 0.1, 'n_estimators': 30}
Best score 0.9721674876847292
Accuracy :  0.9444444444444444


**Regression**

In [5]:
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.metrics import r2_score,mean_squared_error
from sklearn.ensemble import BaggingRegressor


In [6]:
df=fetch_california_housing()
X=df.data
y=df.target
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=12)

In [7]:
regression={
    "Linear Regression":LinearRegression(),
    "Ridge Regression":Ridge(),
    "Lasso Regression":Lasso(),
    "Decision Tree Regressor":DecisionTreeRegressor()

}

In [8]:
for name,model in regression.items():
    model.fit(X_train,y_train)
    y_pred=model.predict(X_test)
    r2=r2_score(y_test,y_pred)
    print(f"{name} : {r2}")

Linear Regression : 0.6156635727330093
Ridge Regression : 0.6156526279884726
Lasso Regression : 0.28592974024163786
Decision Tree Regressor : 0.6534205158322561


In [9]:
br=BaggingRegressor(random_state=54)
params=params={
    "estimator":[LinearRegression(),DecisionTreeRegressor(),Ridge(),Lasso()],
    "n_estimators":[2,5,10,20,30,40,50],
    "max_samples":[0.1,0.25,0.50,0.75],
    "bootstrap":[False,True],
    "bootstrap_features":[False,True],

}
grid_regressor=GridSearchCV(estimator=br,param_grid=params,cv=5,n_jobs=-1,scoring="r2")

In [52]:
grid_regressor.fit(X_train,y_train)
print(f"Best params {grid_regressor.best_params_}")
print(f"Best score {grid_regressor.best_score_}")
reg_model=grid_regressor.best_estimator_
y_pred_bagging=reg_model.predict(X_test)
r2=r2_score(y_test,y_pred_bagging)
print("R2 : ",r2)

Best params {'bootstrap': False, 'bootstrap_features': True, 'estimator': DecisionTreeRegressor(), 'max_samples': 0.75, 'n_estimators': 10}
Best score 0.8122784336732923
R2 :  0.8201761486001231


KeyboardInterrupt: 