<a href="https://www.kaggle.com/code/raneemabdo/ridge-regression-with-sklearn?scriptVersionId=143623379" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import Ridge


In [None]:
 data_url = "http://lib.stat.cmu.edu/datasets/boston"
 raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
 data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
 target = raw_df.values[1::2, 2]
df=pd.DataFrame(data,columns=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT'])
df['target']=target

**Compare between ridge and linear rigression**

In [None]:
r=Ridge()

In [None]:
r.fit(data,target)

In [None]:
r.intercept_

In [None]:
r.coef_

In [None]:
from sklearn.linear_model import LinearRegression
lreg=LinearRegression()
lreg.fit(data,target)

In [None]:
lreg.coef_

In [None]:
lreg.intercept_

In [None]:
plt.plot(r.coef_)
plt.plot(lreg.coef_,c='red')
plt.show()

In [None]:
r_cent=Ridge(fit_intercept=False)
r_cent.fit(data,target)

In [None]:
plt.plot(r_cent.coef_)
plt.plot(lreg.coef_,c='r')
plt.show()

**Range alpha**

In [None]:
alpha_space=np.logspace(-5,3,20)
ridge_coef=[]
for alpha in alpha_space:
    r=Ridge(alpha=alpha,positive=True)
    r.fit(data,target)
    ridge_coef.append(r.coef_)

In [None]:
ax=plt.gca()
ax.plot(alpha_space,ridge_coef)
ax.set_xscale("log")
plt.axis("tight")
plt.show()

In [None]:
alpha_space=np.logspace(-5,2,50)
ridge_scores=[]
ridge_scores_std=[]
ridge=Ridge(positive=True)

In [None]:
from sklearn.model_selection import cross_val_score
for alpha in alpha_space:
    ridge.alpha=alpha
    ridge_cv_scores=cross_val_score(ridge,data,target,cv=10)
    ridge_scores.append(np.mean(ridge_cv_scores))
    ridge_scores_std.append(np.std(ridge_cv_scores))

In [None]:
plt.plot(ridge_scores,ridge_scores_std)

**scoring ridge regression with mean squared error**

In [None]:
from sklearn.metrics import mean_squared_error,make_scorer
alpha_space=np.logspace(-5,2,50)
ridge_scores=[]
ridge_scores_std=[]
ridge=Ridge(positive=True)
mse=make_scorer(mean_squared_error)

In [None]:
from sklearn.model_selection import cross_val_score
for alpha in alpha_space:
    ridge.alpha=alpha
    ridge_cv_scores=cross_val_score(ridge,data,target,cv=10,scoring=mse)
    ridge_scores.append(np.mean(ridge_cv_scores))
    ridge_scores_std.append(np.std(ridge_cv_scores))

In [None]:
plt.plot(ridge_scores,ridge_scores_std)

**ridge hyyper parameter tuning with grid search**

In [None]:
from sklearn.model_selection import GridSearchCV
alpha_space=np.logspace(-5,2,25)
param_grid={'alpha':alpha_space}
r_reg=Ridge(fit_intercept=False)
rreg_cv=GridSearchCV(r_reg,param_grid=param_grid,cv=5)
rreg_cv.fit(data,target)

In [None]:
print("Tuned Ridge Ridression Parameter",rreg_cv.best_params_)
print("Ridge Regression best Score",rreg_cv.best_score_)

**ridge hyyper parameter tuning with radomized search**

In [None]:
from sklearn.model_selection import RandomizedSearchCV
alpha_space=np.logspace(-5,2,25)
param_dist={'alpha':alpha_space}
rid_reg=Ridge(fit_intercept=False)
rid_reg_cv=RandomizedSearchCV(rid_reg,param_distributions=param_dist,cv=5)
rid_reg_cv.fit(data,target)

In [None]:
rid_reg_cv.best_params_

In [None]:
rid_reg_cv.best_score_

**cross validation with ridge cv**

In [None]:
from sklearn.linear_model import RidgeCV

In [None]:
regcv=RidgeCV(alphas=alpha_space,fit_intercept=False,cv=5)
regcv.fit(data,target)


In [None]:
regcv.best_score_

In [None]:
regcv.alpha_

**final evaluate**

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train,x_test,y_train,y_test=train_test_split(data,target,test_size=0.2,random_state=10123)
alpha_space=np.logspace(-5,2,25)
param_grid={'alpha':alpha_space}
ridge_final=Ridge(fit_intercept=False)
ridge_final_cv=GridSearchCV(ridge_final,param_grid=param_grid,cv=5)
ridge_final_cv.fit(x_train,y_train)

In [None]:
preds=ridge_final_cv.predict(x_test)

In [None]:
ridge_final_cv.score(x_test,y_test)

In [None]:
np.sqrt(mean_squared_error(preds,y_test))