### In this Notebook we will generate our own function for Ridge Regression.

In [8]:
# Loading Libraries
from sklearn.datasets import load_diabetes
from sklearn.metrics import r2_score
import numpy as np
from sklearn.model_selection import train_test_split

In [3]:
# Independent and Dependent features
X,y = load_diabetes(return_X_y = True)

In [6]:
X.shape

(442, 10)

In [7]:
y.shape

(442,)

In [9]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2, random_state = 2)

### Stochastic Gradient Descent

In [10]:
from sklearn.linear_model import SGDRegressor

In [11]:
# SGD stands for Stochastic Gradient Descent
reg = SGDRegressor(penalty = 'l2',max_iter = 500, eta0 = 0.1,learning_rate = 'constant', alpha = 0.001)

In [12]:
reg.fit(X_train,y_train)

SGDRegressor(alpha=0.001, eta0=0.1, learning_rate='constant', max_iter=500)

In [13]:
y_pred = reg.predict(X_test)

In [15]:
print("R2 score is", r2_score(y_test,y_pred))

R2 score is 0.43628507522204796


In [17]:
# Betas
print(reg.coef_)
# c
print(reg.intercept_)

[  29.14896732 -115.06415611  394.9561455   263.8726506   -11.1710783
  -67.98587546 -169.23662897  106.43352595  359.05260203  102.64981295]
[161.31884936]


### Ridge Regressor

In [33]:
from sklearn.linear_model import Ridge

reg = Ridge(alpha = 0.001, max_iter = 500, solver="cholesky")

In [34]:
reg.fit(X_train,y_train)

Ridge(alpha=0.001, max_iter=500, solver='cholesky')

In [35]:
y_pred = reg.predict(X_test)

In [36]:
print("R2 score is", r2_score(y_test,y_pred))

R2 score is 0.44086641800390625


In [37]:
# Betas
print(reg.coef_)
# c
print(reg.intercept_)

[  -8.76358326 -204.32112488  518.3717286   339.97538458 -787.69076637
  475.27471785  106.78654     114.63206266  819.73954207   52.87209972]
151.88537325625248


### Using own class

In [38]:

class MeraRidge:
    
    def __init__(self,alpha = 0.1):
        self.alpha = alpha
        self.coef_ = None
        self.intercept_ = None
        
    def fit(self,X_train,y_train):
        
        # w = (x.T x + lamda I)inverse X.T y
        X_train = np.insert(X_train,0,1,axis = 1)
        
        # for identity matrix having shape n*1 , n is shape of column
        I = np.identity(X_train.shape[1])
        I[0][0] = 0
        result = np.linalg.inv(np.dot(X_train.T,X_train) + self.alpha * I).dot(X_train.T).dot(y_train)
        self.intercept_ = result[0]
        self.coef_ = result[1:]
        
    def pred(self,X_test):
        # y = mx + c
        return np.dot(X_test,self.coef_) + self.intercept_
        


In [26]:
np.insert(X_train,0,1,axis = 1)

array([[ 1.        , -0.00188202, -0.04464164, ..., -0.03949338,
        -0.06291295,  0.04034337],
       [ 1.        , -0.00914709, -0.04464164, ..., -0.03949338,
         0.01703713, -0.0052198 ],
       [ 1.        ,  0.02354575,  0.05068012, ..., -0.03949338,
        -0.09643322, -0.01764613],
       ...,
       [ 1.        ,  0.06350368,  0.05068012, ..., -0.00259226,
         0.08449528, -0.01764613],
       [ 1.        , -0.05273755,  0.05068012, ...,  0.1081111 ,
         0.03605579, -0.04249877],
       [ 1.        ,  0.00175052,  0.05068012, ...,  0.1081111 ,
         0.06898221,  0.12732762]])

In [27]:
I = np.identity(X_train.shape[1])
I

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [39]:
reg = MeraRidge()
reg.fit(X_train,y_train)
y_pred = reg.pred(X_test)
print(r2_score(y_test,y_pred))
print(reg.coef_)
print(reg.intercept_)

0.4519973816947851
[   6.64275308 -172.24216584  485.52387163  314.68212174  -72.93932312
  -80.59005344 -174.46651489   83.61665316  484.36328537   73.58415414]
151.92548050708132


### Conclusion-
**We calculated Ridge coefficient and intercept through both methods, Sklearn and from our own class.**

* Both values are quite similar.
