In [1]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error , mean_squared_error , r2_score
from sklearn.linear_model import Ridge , SGDRegressor
from sklearn.datasets import load_diabetes

In [2]:
data = load_diabetes()

In [3]:
data

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990749, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06833155, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286131, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04688253,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452873, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00422151,  0.00306441]]),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142., 155., 225.,  59

In [4]:
X = data['data']
y = data['target']
X.shape , y.shape

((442, 10), (442,))

In [5]:
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size=0.2 , random_state=42)
X_train.shape , X_test.shape

((353, 10), (89, 10))

In [17]:
# Using Gradient Descent in Ridge class only
rr = Ridge(alpha=0.001 , max_iter=500 , solver='sparse_cg') # Total 6 solver is there , 3 for closed form solution , 3 for gradient descent
rr.fit(X_train , y_train)    # here max_iter is epoch 

0,1,2
,alpha,0.001
,fit_intercept,True
,copy_X,True
,max_iter,500
,tol,0.0001
,solver,'sparse_cg'
,positive,False
,random_state,


In [18]:
y_pred = rr.predict(X_test)
print("MAE" , mean_absolute_error(y_test , y_pred))
print("R2_Score" , r2_score(y_test , y_pred))

MAE 42.803081798071354
R2_Score 0.4534280296653138


In [19]:
rr.coef_ , rr.intercept_

(array([  38.48350782, -241.35179305,  543.83518292,  346.78283412,
        -827.70198998,  437.17375331,  116.94974928,  260.75926999,
         696.12922851,   49.74345128]),
 np.float64(151.3419805391815))

In [20]:
# Now lets use SGDRegression class for the same problem , we can solve for ridge and lasso or simple lr
sgd = SGDRegressor(alpha=0.001 , max_iter=500 , penalty='l2' , eta0=0.1 , learning_rate='constant')

# max_iter = epoch , alpha = lambda , penalty=l2 is saying Ridge Regression , learning_rate=constant 
# means keep learning rate constant  , eta0 means initail learning rate
sgd.fit(X_train, y_train)

0,1,2
,loss,'squared_error'
,penalty,'l2'
,alpha,0.001
,l1_ratio,0.15
,fit_intercept,True
,max_iter,500
,tol,0.001
,shuffle,True
,verbose,0
,epsilon,0.1


In [21]:
y_pred2 = sgd.predict(X_test)
print("MAE" , mean_absolute_error(y_test , y_pred2))
print("R2_Score" , r2_score(y_test , y_pred2))


MAE 43.760445775085586
R2_Score 0.45110264217521534


In [22]:
sgd.coef_  , sgd.intercept_

(array([  45.07222882, -140.22633201,  401.76147954,  266.79709864,
         -35.48805184,  -75.61473353, -183.02883719,  133.28070293,
         308.55389047,  117.67061575]),
 array([148.69093912]))

In [62]:
# Now Lets Build Our Own Class to Do all these things 
# grad = X.T@X@W  - X.T@Y + lampda * W

class MeraRidgeGD():
    def __init__(self , learning_rate , epochs , alpha):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.alpha = alpha
        self.coef_ = None
        self.intercept_ = None 
    def fit(self ,X_train , y_train):
        # Inialize coeficent value with 1 and intercept value 0
        self.coef_ = np.ones(X_train.shape[1])
        self.intercept_ = 0
        W = np.insert(self.coef_ , 0 , self.intercept_)
        X_train = np.insert(X_train , 0 , 1 , axis =1)
        
        
        for i in range(self.epochs):
            #grad = np.dot(X_train.T,X_train).dot(W) - np.dot(X_train.T ,y_train) + self.alpha*W 
            grad = (X_train.T)@X_train@(W) - X_train.T@y_train + self.alpha*W 
            W = W - self.learning_rate*grad
            
        self.intercept_ = W[0]
        self.coef_ = W[1:]
        
        return self.coef_ , self.intercept_ 
    def predict(self , X_test):
        y_pred = np.dot(X_test ,self.coef_) + self.intercept_
        return y_pred 
    

In [63]:
Rgd = MeraRidgeGD(learning_rate=0.005 , epochs=500 , alpha=0.001)

In [64]:
Rgd.fit(X_train , y_train)

(array([  43.37183861, -192.03766574,  496.43542567,  319.37407   ,
         -64.42788084, -113.194338  , -213.9073644 ,  144.86136322,
         367.67948022,  119.56857869]),
 np.float64(151.4045475591972))

In [65]:
y_pred3 = Rgd.predict(X_test)
print("MAE" , mean_absolute_error(y_test , y_pred3))
print("R2_Score" , r2_score(y_test , y_pred3))



MAE 43.02512214789863
R2_Score 0.4592249376936409
