In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV,KFold,cross_val_score,train_test_split
from sklearn.ensemble import BaggingRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error


In [None]:
def LinearWithPenalty(penalty="l2", alpha=0.0001, l1_ratio=0.15, **kw):
    # loss='squared_error' = ordinary least squares
    # penalty in {"l2","l1","elasticnet","none"}
    return make_pipeline(
        StandardScaler(with_mean=True, with_std=True),
        SGDRegressor(loss="squared_error",
                     penalty=penalty,
                     alpha=alpha,        # overall regularization strength
                     l1_ratio=l1_ratio,  # only used if penalty='elasticnet'
                     **kw)
    )

model = LinearWithPenalty(penalty="elasticnet", alpha=1e-3, l1_ratio=0.5, max_iter=2000, random_state=42)


In [33]:
mod = LinearRegression()

In [22]:
gridpred = [
    {
        'sgdregressor__penalty': ['elasticnet'],
        'sgdregressor__alpha': [0.2, 0.5, 0.7],
        'sgdregressor__l1_ratio': [0.1, 0.3, 0.5, 0.8],
    },
    {
        'sgdregressor__penalty': ['l1', 'l2'],
        'sgdregressor__alpha': [0.2, 0.5, 0.7],
    },
    {
        'sgdregressor__penalty': ['l1', 'l2'],
        'sgdregressor__alpha': [0.2, 0.5, 0.7],
        'sgdregressor__l1_ratio': [0],  # though not used for l1/l2
    }
]


In [23]:
from sklearn.datasets import fetch_california_housing
X,Y = fetch_california_housing(return_X_y=True)
     
X.shape
Y.shape

(20640,)

In [24]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=45)

In [25]:
grid = GridSearchCV(model,param_grid=gridpred,n_jobs=-1,error_score='raise')
grid.fit(X_train,Y_train)
best = grid.best_estimator_

In [31]:
star = KFold(n_splits=10,shuffle=True)
cross_val_score(best,X,Y,n_jobs=-1,cv = star,scoring='r2')

array([0.51594041, 0.53175369, 0.53384873, 0.47761794, 0.53190888,
       0.53298196, 0.54970479, 0.52653658, 0.54282233, 0.51122692])

In [38]:
star = KFold(n_splits=10,shuffle=True)
cross_val_score(mod,X,Y,n_jobs=-1,cv = star,scoring='r2')

array([0.6229643 , 0.62118283, 0.55631871, 0.59348606, 0.61577154,
       0.59648541, 0.6003869 , 0.60052245, 0.61557646, 0.60540287])

In [50]:
from sklearn.linear_model import Ridge
mod2 = Ridge(alpha = 40,max_iter=1000,random_state=45)
star = KFold(n_splits=10,shuffle=True)
cross_val_score(mod2,X,Y,n_jobs=-1,cv = star,scoring='r2')

array([0.60056588, 0.58906112, 0.59204153, 0.58674352, 0.63059151,
       0.59013811, 0.61653334, 0.60198871, 0.60365531, 0.61915687])

In [58]:
mod2.fit(X_train,Y_train)

In [69]:
y_pred = mod2.predict(X_test)
r2_score(Y_test,y_pred)

0.6071547729372238

# Bagging

In [137]:
bag = BaggingRegressor(mod2,n_estimators=250,oob_score=True,bootstrap=True)
bag.fit(X_train,Y_train)

In [138]:
bag.oob_score_

0.49491021212368524

In [139]:
y_pred = bag.predict(X_test)
r2_score(Y_test,y_pred)

0.5988576302764493

In [140]:
star = KFold(n_splits=10,shuffle=True)
cross_val_score(bag,X,Y,n_jobs=-1,cv = star,scoring='r2')

array([0.6120529 , 0.61432328, 0.57823894, 0.58173958, 0.54470018,
       0.61648641, 0.59317864, 0.58314558, 0.60747851, 0.59996475])

# Pasting

In [147]:
bag2 = BaggingRegressor(
    estimator=mod2,
    n_estimators=100,
    
    bootstrap=False,
    random_state=42,
    verbose = 1,
    n_jobs=-1
)

In [148]:
bag2.fit(X_train,Y_train)

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   2 out of  20 | elapsed:    0.0s remaining:    0.9s
[Parallel(n_jobs=20)]: Done  20 out of  20 | elapsed:    0.0s finished


In [149]:
y_pred = bag2.predict(X_test)
r2_score(Y_test,y_pred)

[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done   2 out of  20 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=20)]: Done  20 out of  20 | elapsed:    0.0s finished


0.6071547729372242

In [150]:
star = KFold(n_splits=10,shuffle=True)
cross_val_score(bag2,X,Y,n_jobs=-1,cv = star,scoring='r2')

array([0.60457812, 0.57105564, 0.60770169, 0.60635412, 0.59704925,
       0.62349987, 0.62834585, 0.60823569, 0.61354247, 0.57654179])

# Random Spaces

In [163]:
bag3 = BaggingRegressor(
    estimator=mod2,
    n_estimators=100,
   
    bootstrap=False,
    
    bootstrap_features=True,
    random_state=42
)

In [164]:
bag3.fit(X_train,Y_train)

In [165]:
y_pred = bag3.predict(X_test)
r2_score(Y_test,y_pred)

0.5517585396601286

# Random Patches

In [166]:
bag4 = BaggingRegressor(
    estimator=mod2,
    n_estimators=100,
   
    bootstrap=True,
    
    bootstrap_features=True,
    random_state=42
)

In [167]:
bag4.fit(X_train,Y_train)

In [169]:
y_pred = bag4.predict(X_test)
r2_score(Y_test,y_pred)

0.5495285319253913