$$y = X^T b + \varepsilon$$

$$\hat{b} = (X^TX)^{-1}X^Ty$$

$$Loss(X, y) = \frac{1}{n}\sum_{i=1}^n(y_i - x_ib)^2\to\min_{b}$$

# Ridge

$$y = X^T b + \varepsilon$$

$$Loss(X, y) = \frac{1}{n}\sum_{i=1}^n(y_i - x_ib)^2 + \gamma(\sum_{i=1}^Db_i^2)^{0.5}\to\min_{b}$$



$$\hat{b} = (X^TX + \gamma I)^{-1}X^Ty$$

In [1]:
import numpy as np



In [23]:
d = 4
x = np.random.rand(1000, d)
y = np.random.rand(1000)



In [24]:
# lin
b_est = np.linalg.inv(x.T.dot(x)).dot(x.T.dot(y))

In [25]:
# ridge
lam = 0.1
b_est_r = np.linalg.inv(x.T.dot(x) + lam * np.diag([lam] * d)).dot(x.T.dot(y))

In [26]:
b_est, b_est_r

(array([0.25713194, 0.20362417, 0.21904939, 0.2462032 ]),
 array([0.25712691, 0.20362536, 0.21904884, 0.24619908]))

In [27]:
y_pred = x.dot(b_est)
y_pred[:4]

array([0.40189437, 0.46179197, 0.44400504, 0.46401265])

In [28]:
y_pred = x.dot(b_est_r)
y_pred[:4]

array([0.40188777, 0.46178686, 0.44400039, 0.46400678])

In [36]:
from sklearn.model_selection import train_test_split, GridSearchCV

In [75]:
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor

In [63]:
model = GridSearchCV(LinearRegression(), {}, cv=6, n_jobs=8, scoring='neg_root_mean_squared_error')

In [64]:
model.fit(x, y)

GridSearchCV(cv=6, estimator=LinearRegression(), n_jobs=8, param_grid={},
             scoring='neg_root_mean_squared_error')

In [65]:
model.best_score_

-0.2863829596772684

In [69]:
model_r = GridSearchCV(Ridge(), 
                       {'alpha': np.linspace(1e-10, 10, 100)}, 
                       cv=6, n_jobs=8, scoring='neg_root_mean_squared_error')

In [70]:
model_r.fit(x, y)

GridSearchCV(cv=6, estimator=Ridge(), n_jobs=8,
             param_grid={'alpha': array([1.00000000e-10, 1.01010101e-01, 2.02020202e-01, 3.03030303e-01,
       4.04040404e-01, 5.05050505e-01, 6.06060606e-01, 7.07070707e-01,
       8.08080808e-01, 9.09090909e-01, 1.01010101e+00, 1.11111111e+00,
       1.21212121e+00, 1.31313131e+00, 1.41414141e+00, 1.51515152e+00,
       1.61616162e+00, 1.71717172e+00, 1.818181...
       7.67676768e+00, 7.77777778e+00, 7.87878788e+00, 7.97979798e+00,
       8.08080808e+00, 8.18181818e+00, 8.28282828e+00, 8.38383838e+00,
       8.48484848e+00, 8.58585859e+00, 8.68686869e+00, 8.78787879e+00,
       8.88888889e+00, 8.98989899e+00, 9.09090909e+00, 9.19191919e+00,
       9.29292929e+00, 9.39393939e+00, 9.49494949e+00, 9.59595960e+00,
       9.69696970e+00, 9.79797980e+00, 9.89898990e+00, 1.00000000e+01])},
             scoring='neg_root_mean_squared_error')

In [90]:
model_r.best_estimator_

Ridge(alpha=10.0)

In [72]:
model.best_score_ < model_r.best_score_

True

In [86]:
model_r.best_estimator_.alpha

10.0

In [76]:
RandomForestRegressor()

RandomForestRegressor()

In [78]:
model_rr = GridSearchCV(RandomForestRegressor(), 
                       {'n_estimators': [10, 100]}, cv=6, n_jobs=8, scoring='neg_root_mean_squared_error')

In [80]:
model_rr.fit(x, y)

GridSearchCV(cv=6, estimator=RandomForestRegressor(), n_jobs=8,
             param_grid={'n_estimators': [10, 100]},
             scoring='neg_root_mean_squared_error')

In [82]:
model_rr.best_score_

-0.2979972953371644

In [83]:
model.best_score_, model_r.best_score_, model_rr.best_score_

(-0.2863829596772684, -0.2862181422123675, -0.2979972953371644)