In [1]:
from sklearn.datasets import load_diabetes
from sklearn.metrics import r2_score
import numpy as np

In [2]:
x,y = load_diabetes(return_X_y = True) # y default loads scaled data

In [3]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2,random_state=4)

In [52]:
# using Gradient descent to fit the ridge model 
from sklearn.linear_model import SGDRegressor

sgd = SGDRegressor(eta0 = 0.0266, learning_rate = 'constant', penalty = 'l2', max_iter = 100, alpha = 0.001) #eta0 is the learning_rate value
sgd.fit(xtrain, ytrain)
pred = sgd.predict(xtest)
print(r2_score(ytest, pred))
print(sgd.coef_, sgd.intercept_)

0.45084673378726603
[  51.5573825  -139.35700348  353.5284863   259.73716396    1.94494252
  -49.4915564  -166.58516249  138.21453748  317.60987748  106.05543454] [161.48041569]


In [5]:
# we can also use the Ridge class to use GD to fit the model
from sklearn.linear_model import Ridge

r = Ridge(alpha = 0.001, solver = 'saga') # solver saga uses stochastic GD
r.fit(xtrain, ytrain)
pred2 = r.predict(xtest)
print(r2_score(ytest, pred2))
print(r.coef_, r.intercept_)

0.4625175538679491
[  34.53481516 -290.8297132   482.42486674  368.06264241 -850.95867099
  500.43286501  179.42011869  270.53452807  759.17644526   37.48099272] 151.10138570715063


In [24]:
# our own class implementing GD to solve Ridge Regression

class MeraRidge :
    
    def __init__(self, alpha = 0.001, learning_rate = 0.001, epochs = 100):
        self.alpha = alpha
        self.intercept_ = None
        self.coef_ = None
        self.epochs = epochs
        self.learning_rate = learning_rate
        
    def fit(self, xtrain, ytrain):
        # here in GD , we have to initially take some coeff values so we'll create weights vector with forst value for intercept and rest for other coef
        self.coef_ = np.ones(xtrain.shape[1])
        self.intercept_ = 0
        weights = np.insert(self.coef_, 0, self.intercept_) # set 1st col of coef to be the intercept term for the model
        
        xtrain = np.insert(xtrain, 0, 1, axis = 1) # making our xtrain column as legible for being multiplied by weights
        
        for i in range(self.epochs):
            derivative = np.dot(xtrain.T, xtrain).dot(weights) - np.dot(ytrain.T, xtrain) + self.alpha*weights
            weights = weights - self.learning_rate*derivative
        
        self.coef_ = weights[1:]
        self.intercept_ = weights[0]
        
        
    def predict(self, xtest):
        #print((np.dot(xtest, self.coef_.reshape(-1,1)) + self.intercept_).shape)
        return np.dot(xtest, self.coef_) + self.intercept_

In [69]:
# we noted that increasing lr value is causing certain errors related to overflow in our class implementation
# also note that we ae applying little bit of regulaization only
reg = MeraRidge(alpha = 0.1,epochs = 1000, learning_rate = 0.0026)
reg.fit(xtrain, ytrain)
preds = reg.predict(xtest)
print(r2_score(ytest,preds))
reg.intercept_, reg.coef_

0.47007144598130957


(150.89079192829706,
 array([  47.45988234, -201.25276553,  426.38647344,  308.29371495,
         -21.75009137,  -84.56646679, -183.71317602,  143.73095353,
         379.48066772,   95.75667701]))