# Bagging Regressor - Housing Dataset

In [7]:
from sklearn.datasets import fetch_california_housing

data = fetch_california_housing()
print("Feature names:", data.feature_names)
print("Data size:", data.data.shape)
print("Target size:", data.target.shape)

Feature names: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
Data size: (20640, 8)
Target size: (20640,)


### Regressior models : LR, KNN, DT

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor

In [18]:
lr = LinearRegression()
knn = KNeighborsRegressor()
dt = DecisionTreeRegressor()

In [13]:
X = data.data
y = data.target

In [14]:
X

array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]])

In [15]:
y

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

In [16]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42)

In [17]:
X_train.shape

(15480, 8)

In [19]:
lr.fit(X_train, y_train)
knn.fit(X_train, y_train)
dt.fit(X_train, y_train)

In [20]:
y_pred_lr = lr.predict(X_test)
y_pred_knn = knn.predict(X_test)
y_pred_dt = dt.predict(X_test)

In [21]:
from sklearn.metrics import r2_score

print("R2 score of LR : ", r2_score(y_pred_lr, y_test))
print("R2 score of KNN : ", r2_score(y_pred_knn, y_test))
print("R2 score of DT : ", r2_score(y_pred_dt, y_test))

R2 score of LR :  0.34413518487144745
R2 score of KNN :  -2.6101589344308467
R2 score of DT :  0.6048589127106381


### Bagging Regressor with Linear Regression

In [22]:
from sklearn.ensemble import BaggingRegressor

In [25]:
bag_lr = BaggingRegressor(
            estimator=LinearRegression(),
            n_estimators=100,
            max_samples=0.5,
            bootstrap=True,
            n_jobs=-1,
            oob_score=True,
            random_state=42,  
)

bag_lr.fit(X_train, y_train)

In [26]:
y_pred1 = bag_lr.predict(X_test)

In [27]:
r2_score(y_pred1, y_test)

0.3601684273652658

### Bagging Regressor with KNN

In [28]:
bag_knn = BaggingRegressor(
            estimator=KNeighborsRegressor(),
            n_estimators=100,
            max_samples=0.5,
            bootstrap=True,
            n_jobs=-1,
            oob_score=True,
            random_state=42,  
)

bag_knn.fit(X_train, y_train)

In [32]:
y_pred2 = bag_knn.predict(X_test)

In [33]:
r2_score(y_pred2, y_test)

-6.244567536259528

### Bagging Regressor with Decision Tree

In [34]:
bag_dt = BaggingRegressor(
            estimator=DecisionTreeRegressor(),
            n_estimators=100,
            max_samples=0.5,
            bootstrap=True,
            n_jobs=-1,
            oob_score=True,
            random_state=42,  
)

bag_dt.fit(X_train, y_train)

In [35]:
y_pred3 = bag_dt.predict(X_test)

In [36]:
r2_score(y_pred3, y_test)

0.7340430575725061

### Bagging Regressor with Grid Search CV

In [45]:
from sklearn.model_selection import GridSearchCV

params = {
    'estimator' : [LinearRegression(), KNeighborsRegressor(), DecisionTreeRegressor()],
    'n_estimators' : [50, 100, 500],
    'max_samples' : [.25,.5],
    'bootstrap' : [True,False],
    'max_features' : [.25,.5],
    'bootstrap_features' : [True,False],
}

bag_gridcv = GridSearchCV(estimator=BaggingRegressor(random_state=42, verbose=1), param_grid=params, cv=3, n_jobs=4, verbose=1)
bag_gridcv.fit(X_train, y_train)

Fitting 3 folds for each of 144 candidates, totalling 432 fits


KeyboardInterrupt: 