In [3]:
import numpy as np
import random


In [4]:

# Generate synthetic data
np.random.seed(42)
X = np.random.rand(1000, 1)
y_true = 5 * X + 4.5 + 0.1 * np.random.randn(1000, 1)

# Add a bias term to X
X_b = np.c_[np.ones((1000, 1)), X]


In [5]:

# Initializations
learning_rate = 0.01
n_epochs = 1000  # Increase the maximum number of epochs
batch_size = 10
m = len(X)


In [6]:


# Function to calculate mean squared error
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)


In [7]:

# Batch Gradient Descent (BGD)
def batch_gradient_descent(X_b, y_true, learning_rate, n_epochs, tol=1e-4):
    w = np.random.randn(2, 1)
    prev_loss = float('inf')
    
    for epoch in range(n_epochs):
        gradients = -2/m * X_b.T.dot(y_true - X_b.dot(w))
        w -= learning_rate * gradients
        y_pred = X_b.dot(w)
        loss = mse(y_true, y_pred)
        print(f"Epoch {epoch}, Loss: {loss}")
        
        # Check convergence based on change in loss
        if abs(prev_loss - loss) < tol:
            break
        
        prev_loss = loss
    
    return w.flatten(), epoch + 1  # Return optimal weights and number of epochs


In [8]:

# Stochastic Gradient Descent (SGD)
def stochastic_gradient_descent(X_b, y_true, learning_rate, n_epochs, tol=1e-4):
    w = np.random.randn(2, 1)
    prev_loss = float('inf')
    
    for epoch in range(n_epochs):
        for i in range(m):
            random_index = random.randint(0, m - 1)
            xi = X_b[random_index:random_index+1]
            yi = y_true[random_index:random_index+1]
            gradients = -2 * xi.T.dot(yi - xi.dot(w))
            w -= learning_rate * gradients
            y_pred = X_b.dot(w)
            loss = mse(y_true, y_pred)
        
        print(f"Epoch {epoch}, Loss: {loss}")
        
        # Check convergence based on change in loss
        if abs(prev_loss - loss) < tol:
            break
        
        prev_loss = loss
    
    return w.flatten(), epoch + 1


In [9]:

# Mini-Batch Gradient Descent (MBGD)
def mini_batch_gradient_descent(X_b, y_true, learning_rate, n_epochs, batch_size, tol=1e-4):
    w = np.random.randn(2, 1)
    prev_loss = float('inf')
    
    for epoch in range(n_epochs):
        for i in range(0, m, batch_size):
            xi = X_b[i:i+batch_size]
            yi = y_true[i:i+batch_size]
            gradients = -2/batch_size * xi.T.dot(yi - xi.dot(w))
            w -= learning_rate * gradients
            y_pred = X_b.dot(w)
            loss = mse(y_true, y_pred)
        
        print(f"Epoch {epoch}, Loss: {loss}")
        
        # Check convergence based on change in loss
        if abs(prev_loss - loss) < tol:
            break
        
        prev_loss = loss
    
    return w.flatten(), epoch + 1


In [12]:

# Run Batch Gradient Descent
w_bgd, epochs_bgd = batch_gradient_descent(X_b, y_true, learning_rate, n_epochs)

# Run Stochastic Gradient Descent
w_sgd, epochs_sgd = stochastic_gradient_descent(X_b, y_true, learning_rate, n_epochs)

# Run Mini-Batch Gradient Descent
w_mbgd, epochs_mbgd = mini_batch_gradient_descent(X_b, y_true, learning_rate, n_epochs, batch_size)

print("Optimal Weights (batch_gradient_descent):", w_bgd)
print("Number of Epochs (batch_gradient_descent):", epochs_bgd)

print("Optimal Weights (stochastic_gradient_descen):", w_sgd)
print("Number of Epochs (stochastic_gradient_descen):", epochs_sgd)

print("Optimal Weights (mini_batch_gradient_descent):", w_mbgd)
print("Number of Epochs (mini_batch_gradient_descent):", epochs_mbgd)


Epoch 0, Loss: 45.367359627044515
Epoch 1, Loss: 43.12252411572938
Epoch 2, Loss: 40.98918946499093
Epoch 3, Loss: 38.96181634611491
Epoch 4, Loss: 37.03514062549209
Epoch 5, Loss: 35.20415969285507
Epoch 6, Loss: 33.464119468731894
Epoch 7, Loss: 31.810502057372705
Epoch 8, Loss: 30.239014013082148
Epoch 9, Loss: 28.74557518948339
Epoch 10, Loss: 27.32630814275357
Epoch 11, Loss: 25.977528061309304
Epoch 12, Loss: 24.695733195787877
Epoch 13, Loss: 23.47759576446955
Epoch 14, Loss: 22.319953310520752
Epoch 15, Loss: 21.219800488611668
Epoch 16, Loss: 20.174281259576787
Epoch 17, Loss: 19.180681472846715
Epoch 18, Loss: 18.23642181738668
Epoch 19, Loss: 17.339051122834213
Epoch 20, Loss: 16.48623999343805
Epoch 21, Loss: 15.675774758264573
Epoch 22, Loss: 14.905551721959545
Epoch 23, Loss: 14.173571701133495
Epoch 24, Loss: 13.477934832180871
Epoch 25, Loss: 12.816835637048069
Epoch 26, Loss: 12.18855833413537
Epoch 27, Loss: 11.591472382154501
Epoch 28, Loss: 11.02402824536845
Epoch 2

In [2]:
# Deterministic Gradient Descent (DGD)
def deterministic_gradient_descent(X_b, y_true, learning_rate, n_epochs, tol=1e-4):
    w = np.random.randn(2, 1)
    prev_loss = float('inf')
    
    for epoch in range(n_epochs):
        gradients = -2/m * X_b.T.dot(y_true - X_b.dot(w))
        w -= learning_rate * gradients
        y_pred = X_b.dot(w)
        loss = mse(y_true, y_pred)
        print(f"Epoch {epoch}, Loss: {loss}")
        
        # Check convergence based on change in loss
        if abs(prev_loss - loss) < tol:
            break
        
        prev_loss = loss
    
    return w.flatten(), epoch + 1

# Run Deterministic Gradient Descent
w_dgd, epochs_dgd = deterministic_gradient_descent(X_b, y_true, learning_rate, n_epochs)

print("Optimal Weights (DGD):", w_dgd)
print("Number of Epochs (DGD):", epochs_dgd)


Epoch 0, Loss: 58.15885838076257
Epoch 1, Loss: 55.295438319250835
Epoch 2, Loss: 52.57420476674391
Epoch 3, Loss: 49.98809404769272
Epoch 4, Loss: 47.530393411259695
Epoch 5, Loss: 45.194723597290334
Epoch 6, Loss: 42.97502226841157
Epoch 7, Loss: 40.86552826522794
Epoch 8, Loss: 38.860766643723515
Epoch 9, Loss: 36.95553445600968
Epoch 10, Loss: 35.14488723748886
Epoch 11, Loss: 33.42412616533955
Epoch 12, Loss: 31.78878585497105
Epoch 13, Loss: 30.234622762753496
Epoch 14, Loss: 28.757604164903192
Epoch 15, Loss: 27.35389768389977
Epoch 16, Loss: 26.019861335233678
Epoch 17, Loss: 24.75203406863374
Epoch 18, Loss: 23.547126779209023
Epoch 19, Loss: 22.40201376515942
Epoch 20, Loss: 21.313724609869414
Epoch 21, Loss: 20.279436467301515
Epoch 22, Loss: 19.29646673065324
Epoch 23, Loss: 18.362266065237236
Epoch 24, Loss: 17.474411787489597
Epoch 25, Loss: 16.63060157291077
Epoch 26, Loss: 15.828647476597638
Epoch 27, Loss: 15.066470250836984
Epoch 28, Loss: 14.342093945002475
Epoch 29,

In [13]:
import numpy as np

# Generate synthetic data
np.random.seed(42)
X = np.random.rand(1000, 1)
y_true = 5 * X + 4.5 + 0.1 * np.random.randn(1000, 1)

# Add a bias term to X
X_b = np.c_[np.ones((1000, 1)), X]

# Direct Gradient Descent (Normal Equation)
def direct_gradient_descent(X_b, y_true):
    """
    Perform Direct Gradient Descent using the normal equation.

    Parameters:
    - X_b: Input data with bias term
    - y_true: True labels

    Returns:
    - w: Optimal weights (w0, w1)
    """
    w = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_true)
    return w.flatten()

# Run Direct Gradient Descent
w_direct = direct_gradient_descent(X_b, y_true)

print("Optimal Weights (Direct Gradient Descent):", w_direct)


Optimal Weights (Direct Gradient Descent): [4.51747803 4.98452148]
