<a href="https://colab.research.google.com/github/Sneha123-zudo/Machine-Learning---Learning/blob/main/Gradient_Descent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Gradient Descent**

In [1]:
# Simple Gradient Descent Example
# Goal: learn y = mx + b

# Data (x, y)
x = [1, 2, 3]
y = [2, 4, 6]

# Step 1: start with random m and b
m = 0   # slope
b = 0   # intercept

# Learning rate (step size)
lr = 0.1

# Number of times to learn
epochs = 100

for i in range(epochs):

    # Step 2: Make predictions
    y_pred = [m*xi + b for xi in x]

    # Step 3: Calculate error (mean squared error)
    error = 0
    for yi, ypi in zip(y, y_pred):
        error += (ypi - yi)**2
    error = error / len(x)

    # Step 4: Calculate gradients
    dm = 0
    db = 0
    for xi, yi, ypi in zip(x, y, y_pred):
        dm += 2 * xi * (ypi - yi)
        db += 2 * (ypi - yi)

    dm /= len(x)
    db /= len(x)

    # Step 5: Update m and b (move downhill)
    m = m - lr * dm
    b = b - lr * db

    # Print progress
    print("step:", i, "m:", round(m, 3), "b:", round(b, 3), "error:", round(error, 3))

print("\nFinal equation: y =", round(m, 2), "x +", round(b, 2))


step: 0 m: 1.867 b: 0.8 error: 18.667
step: 1 m: 1.671 b: 0.693 error: 0.296
step: 2 m: 1.701 b: 0.686 error: 0.073
step: 3 m: 1.706 b: 0.669 error: 0.067
step: 4 m: 1.713 b: 0.653 error: 0.064
step: 5 m: 1.72 b: 0.637 error: 0.061
step: 6 m: 1.727 b: 0.622 error: 0.058
step: 7 m: 1.733 b: 0.607 error: 0.055
step: 8 m: 1.74 b: 0.592 error: 0.053
step: 9 m: 1.746 b: 0.578 error: 0.05
step: 10 m: 1.752 b: 0.564 error: 0.048
step: 11 m: 1.758 b: 0.55 error: 0.046
step: 12 m: 1.764 b: 0.537 error: 0.043
step: 13 m: 1.769 b: 0.524 error: 0.041
step: 14 m: 1.775 b: 0.512 error: 0.039
step: 15 m: 1.78 b: 0.499 error: 0.038
step: 16 m: 1.786 b: 0.487 error: 0.036
step: 17 m: 1.791 b: 0.476 error: 0.034
step: 18 m: 1.796 b: 0.464 error: 0.032
step: 19 m: 1.801 b: 0.453 error: 0.031
step: 20 m: 1.805 b: 0.442 error: 0.029
step: 21 m: 1.81 b: 0.432 error: 0.028
step: 22 m: 1.815 b: 0.421 error: 0.027
step: 23 m: 1.819 b: 0.411 error: 0.025
step: 24 m: 1.824 b: 0.401 error: 0.024
step: 25 m: 1.828

**Types of Gradient Descent**

**1. Batch Gradient Descent**

In [2]:
from sklearn.datasets import load_diabetes
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import time

In [3]:
X,y = load_diabetes(return_X_y = True)

In [4]:
X.shape , y.shape

((442, 10), (442,))

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=2)

In [6]:
reg = LinearRegression()
reg.fit(X_train, y_train)

In [7]:
reg.coef_ , reg.intercept_

(array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
        -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
         861.12700152,   52.42112238]),
 np.float64(151.88331005254167))

In [8]:
y_pred = reg.predict(X_test)

In [9]:
print("R2 Score:", r2_score(y_test, y_pred))

R2 Score: 0.4399338661568968


In [10]:
class GDRegressor:

    def __init__(self,learning_rate=0.01,epochs=100):

        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs

    def fit(self,X_train,y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])

        for i in range(self.epochs):
            # update all the coef and the intercept
            y_hat = np.dot(X_train,self.coef_) + self.intercept_
            #print("Shape of y_hat",y_hat.shape)
            intercept_der = -2 * np.mean(y_train - y_hat)
            self.intercept_ = self.intercept_ - (self.lr * intercept_der)

            coef_der = -2 * np.dot((y_train - y_hat),X_train)/X_train.shape[0]
            self.coef_ = self.coef_ - (self.lr * coef_der)

        print(self.intercept_,self.coef_)

    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [11]:
gdr = GDRegressor(epochs=100,learning_rate=0.5)

In [12]:
start = time.time()
gdr.fit(X_train,y_train)
print("Time taken", time.time()-start)

151.53487825611782 [ 50.75844879  -0.84984779 165.27498779 124.74079278  41.05627844
  25.46870233 -96.49244319  94.18418663 158.14584022  87.51493943]
Time taken 0.028650999069213867


In [13]:
y_pred = gdr.predict(X_test)

In [14]:
r2_score(y_test,y_pred)

0.3201524330793638

**2. Stochastic Gradient Descent** *italicised text*

In [15]:
from sklearn.datasets import load_diabetes
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [16]:
X,y = load_diabetes(return_X_y = True)

In [17]:
X.shape , y.shape

((442, 10), (442,))

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=2)

In [19]:
reg = LinearRegression()
reg.fit(X_train, y_train)

In [20]:
reg.coef_ , reg.intercept_

(array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
        -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
         861.12700152,   52.42112238]),
 np.float64(151.88331005254167))

In [21]:
y_pred = reg.predict(X_test)

In [22]:
print("R2 Score:", r2_score(y_test, y_pred))

R2 Score: 0.4399338661568968


In [23]:
class SGDRegressor:

    def __init__(self,learning_rate=0.01,epochs=100):

        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs

    def fit(self,X_train,y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])

        for i in range(self.epochs):
          for j in range(X_train.shape[0]):
            idx = np.random.randint(0,X_train.shape[0])
            y_hat = np.dot(X_train[idx],self.coef_) + self.intercept_
            intercept_der = -2 * np.mean(y_train[idx] - y_hat)
            self.intercept_ = self.intercept_ - (self.lr * intercept_der)
            coef_der = -2 * np.dot((y_train[idx] - y_hat),X_train[idx])
            self.coef_ = self.coef_ - (self.lr * coef_der)



        print(self.intercept_,self.coef_)

    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [24]:
sgd = SGDRegressor(epochs=50, learning_rate=0.01)

In [25]:
start = time.time()
sgd.fit(X_train, y_train)
print("Time taken: ", time.time()-start)

141.94538180862736 [  50.15733609  -66.77115126  362.07295285  256.33416294   18.51320769
  -23.84311504 -175.83499334  130.86229673  316.22199205  125.72016605]
Time taken:  1.2452757358551025


In [26]:
y_pred = sgd.predict(X_test)

In [27]:
print("R2 score: ", r2_score(y_test, y_pred))

R2 score:  0.4162639445622306


**3. Mini - Batch Gradient Descent**

In [28]:
import random
class MBGDRegressor:

    def __init__(self,batch_size,learning_rate=0.01,epochs=100):

        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size

    def fit(self,X_train,y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])

        for i in range(self.epochs):
          for j in range(int(X_train.shape[0]/self.batch_size)):
            idx = random.sample(range(X_train.shape[0]), self.batch_size)
            y_hat = np.dot(X_train[idx],self.coef_) + self.intercept_
            intercept_der = -2 * np.mean(y_train[idx] - y_hat)
            self.intercept_ = self.intercept_ - (self.lr * intercept_der)
            coef_der = -2 * np.dot((y_train[idx] - y_hat),X_train[idx])
            self.coef_ = self.coef_ - (self.lr * coef_der)



        print(self.intercept_,self.coef_)

    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [29]:
mbr = MBGDRegressor(batch_size=int(X_train.shape[0]/10), learning_rate=0.01, epochs=50)

In [30]:
mbr.fit(X_train, y_train)

152.03765949166666 [  64.22797465  -67.24980851  355.27959217  252.71912673   11.57961427
  -34.7884939  -174.92208641  129.4293085   319.59244089  132.820308  ]


In [31]:
y_pred = mbr.predict(X_test)

In [32]:
print("R2 Score:", r2_score(y_test, y_pred))

R2 Score: 0.4343437865030888
