### Stochastic Gradient Descent Implementation

In [3]:
from sklearn.datasets import load_diabetes
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [None]:
class SGDRegressor:
    """
    A simple implementation of Batch Gradient Descent for linear regression.
    This class fits a linear model to the data using gradient descent.
    """

    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.coef_ = None
        self.intercept_ = None
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations        #epochs


    def fit(self, X, y):
        """
        Fit the model to the training data using batch gradient descent.
        Parameters:
        X : array-like, shape (n_samples, n_features)
            Training data.
        y : array-like, shape (n_samples,)
            Target values.
        """
        n_samples, n_features = X.shape
        self.coef_ = np.ones(n_features)
        self.intercept_ = 0

        for _ in range(self.n_iterations):
            for i in range(n_samples):
                idx = np.random.randint(0, n_samples)       # Randomly select an index for stochastic gradient descent
                y_pred = np.dot(X[idx], self.coef_) + self.intercept_       

                # Calculate gradients
                intercept_der = -2 * (y[idx] - y_pred)      
                coef_der = -2 * (X[idx] * (y[idx] - y_pred))

                # Update coefficients and intercept
                self.coef_ -= self.learning_rate * coef_der
                self.intercept_ -= self.learning_rate * intercept_der

        print(f"Final coefficients: {self.coef_}, Final intercept: {self.intercept_}")

    
    def predict(self, X):
        """
        Predict using the linear model.
        Parameters:
        X : array-like, shape (n_samples, n_features)
            Samples to predict.
        Returns:
        y_pred : array-like, shape (n_samples,)
            Predicted values.
        """
        return np.dot(X, self.coef_) + self.intercept_



In [11]:
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
sgd_regressor = SGDRegressor(learning_rate=0.01, n_iterations=50)

In [13]:
sgd_regressor.fit(X_train, y_train)

Final coefficients: [  59.03857827 -102.31128273  364.46680422  246.8899181    -1.45116488
  -37.32335417 -186.40073488  153.95699727  283.87864777  138.51772895], Final intercept: 149.6077800754558


In [14]:
y_pred = sgd_regressor.predict(X_test)
print(f"R^2 score: {r2_score(y_test, y_pred)}")

R^2 score: 0.44578787705206313


### Comparison between the custom class and scikit-learn’s built-in model.

In [15]:
from sklearn.linear_model import LinearRegression
sk_model = LinearRegression()
sk_model.fit(X_train, y_train)
sk_y_pred = sk_model.predict(X_test)
print(f"Sklearn R^2 score: {r2_score(y_test, sk_y_pred)}")
print(f"Sklearn coefficients: {sk_model.coef_}, Sklearn intercept: {sk_model.intercept_}")

Sklearn R^2 score: 0.45260276297191926
Sklearn coefficients: [  37.90402135 -241.96436231  542.42875852  347.70384391 -931.48884588
  518.06227698  163.41998299  275.31790158  736.1988589    48.67065743], Sklearn intercept: 151.34560453985995


In [18]:
from sklearn.linear_model import SGDRegressor

sgd_sklearn = SGDRegressor(learning_rate='constant', eta0=0.01, max_iter=100)
sgd_sklearn.fit(X_train, y_train)
sgd_sklearn_y_pred = sgd_sklearn.predict(X_test)
print(f"Sklearn SGD R^2 score: {r2_score(y_test, sgd_sklearn_y_pred)}")


Sklearn SGD R^2 score: 0.44647465175599643
