# Create Dataset

In [1]:
import numpy as np
import time
import matplotlib.pyplot as plt

from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
from sklearn import set_config
set_config(display='diagram')

In [3]:
X, y = load_diabetes(return_X_y=True)

In [4]:
print(f"Size of X :{X.shape} \nSize of y {y.shape}")

Size of X :(442, 10) 
Size of y (442,)


# Train Test Split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [6]:

print(f"Size of X_train :{X_train.shape} \nSize of y_train {y_train.shape} \n")
print(f"Size of X_test :{X_test.shape} \nSize of y_test {y_test.shape} \n")

Size of X_train :(353, 10) 
Size of y_train (353,) 

Size of X_test :(89, 10) 
Size of y_test (89,) 



# Apply Sklearn Model

In [7]:
reg = LinearRegression()
reg.fit(X_train, y_train)

In [8]:
reg.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [9]:
reg.intercept_

np.float64(151.88331005254167)

In [10]:
y_pred = reg.predict(X_test)
r2_score(y_test, y_pred)

0.4399338661568968

# Let's  Create our own Class

In [22]:
import numpy as np

class StochasticGDRegressor:
    
    def __init__(self, learning_rate=0.01, epochs=100):
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
        
    def fit(self, X, y):
        # Initialize the coefficients and intercept
        self.intercept_ = 0
        self.coef_ = np.ones(X.shape[1])
        
        # Gradient Descent Loop
        for epoch in range(self.epochs):
            for row_index in range(X.shape[0]):
                # This randomly selects one row and fetches its values
                idx = np.random.randint(0, X.shape[0])
                
                # Make predictions for the randomly selected sample
                y_hat = np.dot(X[idx], self.coef_) + self.intercept_
            
                # Calculate gradients
                intercept_derivative = -2 * (y[idx] - y_hat)
                self.intercept_ = self.intercept_ - (self.lr * intercept_derivative)
                
                # Update coefficients
                coefficient_derivative = -2 * (y[idx] - y_hat) * X[idx]
                self.coef_ = self.coef_ - (self.lr * coefficient_derivative)
                
                # for debugging
                # if epoch % 10 == 0:  # Print every 10 epochs
                #     print(f"Epoch {epoch}: Coefficients = {self.coef_}, Intercept = {self.intercept_}")
                
    def predict(self, X):
        # Make predictions using the learned parameters
        return np.dot(X, self.coef_) + self.intercept_


In [23]:
sgd = StochasticGDRegressor(learning_rate=0.02, epochs=100)

In [24]:
sgd.fit(X_train, y_train)

In [25]:
y_pred_own = sgd.predict(X_test)

In [26]:
r2_score(y_test, y_pred_own)

0.44444221713885834

# Verify our class with sklean 

In [27]:
from sklearn.linear_model import SGDRegressor

In [28]:
sgdr = SGDRegressor(max_iter=100, learning_rate='constant', eta0=0.01)

In [29]:
sgdr.fit(X_train, y_train)



In [30]:
y_pred_sgdr = sgdr.predict(X_test)

In [32]:
r2_score(y_test, y_pred_sgdr)

0.4329033020575098

In [None]:
# Now compare with the model 
# nearly all same

print(f"Linear Regression r^2 Score :{r2_score(y_test, y_pred)} \n")
print(f"Our own Regression r^2 Score :{r2_score(y_test, y_pred_own)} \n")
print(f"SGDRegressor r^2 Score :{r2_score(y_test, y_pred)} \n")

Linear Regression r^2 Score :0.4399338661568968 

Our own Regression r^2 Score :0.44444221713885834 

SGDRegressor r^2 Score :0.4399338661568968 

