# Load Dataset

In [2]:
from sklearn import datasets

In [9]:
digit_df = datasets.load_digits()
digit_df

{'data': array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ..., 10.,  0.,  0.],
        [ 0.,  0.,  0., ..., 16.,  9.,  0.],
        ...,
        [ 0.,  0.,  1., ...,  6.,  0.,  0.],
        [ 0.,  0.,  2., ..., 12.,  0.,  0.],
        [ 0.,  0., 10., ..., 12.,  1.,  0.]], shape=(1797, 64)),
 'target': array([0, 1, 2, ..., 8, 9, 8], shape=(1797,)),
 'frame': None,
 'feature_names': ['pixel_0_0',
  'pixel_0_1',
  'pixel_0_2',
  'pixel_0_3',
  'pixel_0_4',
  'pixel_0_5',
  'pixel_0_6',
  'pixel_0_7',
  'pixel_1_0',
  'pixel_1_1',
  'pixel_1_2',
  'pixel_1_3',
  'pixel_1_4',
  'pixel_1_5',
  'pixel_1_6',
  'pixel_1_7',
  'pixel_2_0',
  'pixel_2_1',
  'pixel_2_2',
  'pixel_2_3',
  'pixel_2_4',
  'pixel_2_5',
  'pixel_2_6',
  'pixel_2_7',
  'pixel_3_0',
  'pixel_3_1',
  'pixel_3_2',
  'pixel_3_3',
  'pixel_3_4',
  'pixel_3_5',
  'pixel_3_6',
  'pixel_3_7',
  'pixel_4_0',
  'pixel_4_1',
  'pixel_4_2',
  'pixel_4_3',
  'pixel_4_4',
  'pixel_4_5',
  'pixel_4_6',
  'pixel_4_7'

In [10]:
X, y = datasets.load_digits(return_X_y=True)

In [11]:
print(X, "\n")
print(y)

[[ 0.  0.  5. ...  0.  0.  0.]
 [ 0.  0.  0. ... 10.  0.  0.]
 [ 0.  0.  0. ... 16.  9.  0.]
 ...
 [ 0.  0.  1. ...  6.  0.  0.]
 [ 0.  0.  2. ... 12.  0.  0.]
 [ 0.  0. 10. ... 12.  1.  0.]] 

[0 1 2 ... 8 9 8]


# import libraries

In [13]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor

from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=20, random_state=123)
print(f"Train/Test Size : {X_train.shape}, {X_test.shape}, {y_train.shape}, {y_test.shape}")

Train/Test Size : (1777, 64), (20, 64), (1777,), (20,)


In [17]:
# Create an object for each model
lr = LinearRegression()
dt = DecisionTreeRegressor()
knn = KNeighborsRegressor()

In [18]:
# train the data
lr.fit(X_train, y_train)
dt.fit(X_train, y_train)
knn.fit(X_train, y_train)

In [None]:
# Predict 
y_pred_lr =  lr.predict(X_test)
y_pred_dt = dt.predict(X_test)
y_pred_knn = knn.predict(X_test)

In [24]:
print(f"R^2 Score of LR :{r2_score(y_test, y_pred_lr)}")
print(f"R^2 Score of DT :{r2_score(y_test, y_pred_dt)}")
print(f"R^2 Score of KNN : {r2_score(y_test, y_pred_knn)}")


R^2 Score of LR :-0.000640052192592222
R^2 Score of DT :0.17004048582995956
R^2 Score of KNN : 1.0


# Import bagging regressor

In [26]:
from sklearn.ensemble import BaggingRegressor

bagging_regressor = BaggingRegressor(random_state=1)
bagging_regressor.fit(X_train, y_train)

In [27]:
import numpy as np

In [33]:
Y_preds = bagging_regressor.predict(X_test)

print('Training Coefficient of R^2 : %.3f'%bagging_regressor.score(X_train, y_train))
print('Test Coefficient of R^2 : %.3f'%bagging_regressor.score(X_test, y_test))

Training Coefficient of R^2 : 0.965
Test Coefficient of R^2 : 0.641


In [40]:
y_pred_bag_r = bagging_regressor.predict(X_test)

print(f"Training Coeff. of R^2 : {np.round(bagging_regressor.score(X_train, y_train), 3)}. \n")
print(f"Test Coeff. of R^2 : {np.round(bagging_regressor.score(X_test, y_test), 3)}")

Training Coeff. of R^2 : 0.965. 

Test Coeff. of R^2 : 0.641


# Grid Search CV

In [42]:
from sklearn.model_selection import GridSearchCV

In [None]:
%%time

n_samples = digit_df.data.shape[0]
n_features = digit_df.data.shape[1]

params = {'estimator': [None, LinearRegression(), KNeighborsRegressor()],
          'n_estimators': [20,50,100],
          'max_samples': [0.5,1.0],
          'max_features': [0.5,1.0],
          'bootstrap': [True, False],
          'bootstrap_features': [True, False]}

bagging_regressor_grid = GridSearchCV(BaggingRegressor(random_state=1, n_jobs=-1), param_grid =params, cv=3, n_jobs=-1, verbose=1)
bagging_regressor_grid.fit(X_train, y_train)

print('Train R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(X_train, Y_train))
print("--"*20)

print('Test R^2 Score : %.3f'%bagging_regressor_grid.best_estimator_.score(X_test, Y_test))
print("--"*20)

print('Best R^2 Score Through Grid Search : %.3f'%bagging_regressor_grid.best_score_)
print("--"*20)

print('Best Parameters : ',bagging_regressor_grid.best_params_)
print("--"*20)

Fitting 3 folds for each of 144 candidates, totalling 432 fits


- 20:46 IST
- 18-06-2025