## Initialize a small dataset

In [None]:
import numpy as np
import pandas as pd

# Dataset
education_level = [1, 3, 4, 3]
Age = [20, 64, 55, 35]
Salary = [45, 60, 70, 90]

df = pd.DataFrame({
    'Education Level': education_level,
    'Age': Age,
    'Salary': Salary
})

df

Unnamed: 0,Education Level,Age,Salary
0,1,20,45
1,3,64,60
2,4,55,70
3,3,35,90


In [None]:
# Features and Target
X = df[['Education Level', 'Age']].values
y = df['Salary'].values

# Normalize the features and target for better convergence
X_mean, X_std = X.mean(axis=0), X.std(axis=0)
X = (X - X_mean) / X_std

y_mean, y_std = y.mean(), y.std()
y_normalized = (y - y_mean) / y_std

## SGD

In [None]:
# Add a bias term to X
X = np.c_[np.ones(X.shape[0]), X]  # Add a column of ones for the bias term

# Initialize parameters (weights)
theta = np.random.randn(X.shape[1])  # Random initialization

# Hyperparameters
learning_rate = 0.01
epochs = 500

# Stochastic Gradient Descent Implementation
for epoch in range(epochs):
    for i in range(len(X)):  # Loop through each data point
        # Calculate prediction
        prediction = np.dot(X[i], theta)

        # Calculate the error
        error = prediction - y[i]

        # Update weights using the gradient
        gradient = error * X[i]
        theta -= learning_rate * gradient  # Update rule

    # (Optional) Print loss every 100 epochs
    if epoch % 10 == 0:
        loss = np.mean((np.dot(X, theta) - y) ** 2)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Final Weights
print("Final Weights (theta):", theta)

# Making Predictions
predictions = np.dot(X, theta)
print("Predicted Salaries:", predictions)


Epoch 0, Loss: 4521.3916
Epoch 10, Loss: 2099.1228
Epoch 20, Loss: 1024.4783
Epoch 30, Loss: 541.9913
Epoch 40, Loss: 321.8562
Epoch 50, Loss: 218.7950
Epoch 60, Loss: 168.4644
Epoch 70, Loss: 142.2430
Epoch 80, Loss: 127.3324
Epoch 90, Loss: 117.9618
Epoch 100, Loss: 111.4899
Epoch 110, Loss: 106.6755
Epoch 120, Loss: 102.9094
Epoch 130, Loss: 99.8713
Epoch 140, Loss: 97.3770
Epoch 150, Loss: 95.3096
Epoch 160, Loss: 93.5871
Epoch 170, Loss: 92.1481
Epoch 180, Loss: 90.9445
Epoch 190, Loss: 89.9369
Epoch 200, Loss: 89.0933
Epoch 210, Loss: 88.3869
Epoch 220, Loss: 87.7954
Epoch 230, Loss: 87.3002
Epoch 240, Loss: 86.8856
Epoch 250, Loss: 86.5385
Epoch 260, Loss: 86.2480
Epoch 270, Loss: 86.0050
Epoch 280, Loss: 85.8016
Epoch 290, Loss: 85.6316
Epoch 300, Loss: 85.4893
Epoch 310, Loss: 85.3704
Epoch 320, Loss: 85.2711
Epoch 330, Loss: 85.1880
Epoch 340, Loss: 85.1187
Epoch 350, Loss: 85.0608
Epoch 360, Loss: 85.0125
Epoch 370, Loss: 84.9722
Epoch 380, Loss: 84.9386
Epoch 390, Loss: 84.

## SDG with Momentum

In [None]:
# Add a bias term to X
X = np.c_[np.ones(X.shape[0]), X]  # Add a column of ones for the bias term

# Initialize parameters (weights) and velocity
theta = np.random.randn(X.shape[1])  # Random initialization
velocity = np.zeros(X.shape[1])  # Initialize velocity vector

# Hyperparameters
learning_rate = 0.01
momentum = 0.9  # Momentum factor
epochs = 100

# SGD with Momentum Implementation
for epoch in range(epochs):
    for i in range(len(X)):  # Loop through each data point
        # Calculate prediction
        prediction = np.dot(X[i], theta)

        # Calculate the error
        error = prediction - y[i]

        # Compute the gradient
        gradient = error * X[i]

        # Update velocity: momentum factor + current gradient
        velocity = momentum * velocity - learning_rate * gradient

        # Update weights using velocity
        theta += velocity

    # (Optional) Print loss every 100 epochs
    if epoch % 10 == 0:
        loss = np.mean((np.dot(X, theta) - y) ** 2)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Final Weights
print("Final Weights (theta):", theta)

# Making Predictions
predictions = np.dot(X, theta)
print("Predicted Salaries:", predictions)


Epoch 0, Loss: 3187.1703
Epoch 10, Loss: 142.5843
Epoch 20, Loss: 87.3918
Epoch 30, Loss: 84.9955
Epoch 40, Loss: 84.7953
Epoch 50, Loss: 84.7786
Epoch 60, Loss: 84.7770
Epoch 70, Loss: 84.7768
Epoch 80, Loss: 84.7767
Epoch 90, Loss: 84.7767
Final Weights (theta): [ 33.38020425  32.83819587  21.06289257 -13.44239408]
Predicted Salaries: [50.80890339 54.98587281 81.36729773 77.71152655]


## Adam

In [None]:
# Add a bias term to X
X = np.c_[np.ones(X.shape[0]), X]  # Add a column of ones for the bias term

# Initialize parameters
theta = np.random.randn(X.shape[1])  # Random initialization of weights

# Hyperparameters
learning_rate = 0.01
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-8
epochs = 1000

# Initialize moments
m = np.zeros_like(theta)  # First moment (mean of gradients)
v = np.zeros_like(theta)  # Second moment (variance of gradients)

# Adam Optimization
for epoch in range(epochs):
    for i in range(len(X)):
        # Calculate prediction and error
        prediction = np.dot(X[i], theta)
        error = prediction - y_normalized[i]

        # Compute gradient
        gradient = error * X[i]

        # Update moments
        m = beta1 * m + (1 - beta1) * gradient
        v = beta2 * v + (1 - beta2) * (gradient ** 2)

        # Correct bias
        m_hat = m / (1 - beta1 ** (epoch + 1))
        v_hat = v / (1 - beta2 ** (epoch + 1))

        # Update weights
        theta -= learning_rate * m_hat / (np.sqrt(v_hat) + epsilon)

    # Optional: Monitor loss every 100 epochs
    if epoch % 10 == 0:
        loss = np.mean((np.dot(X, theta) - y_normalized) ** 2)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Denormalize the predictions
predictions_normalized = np.dot(X, theta)
predictions = predictions_normalized * y_std + y_mean

# Output final weights and denormalized predictions
print("\nFinal Weights (theta):", theta)
print("Denormalized Predictions (Salary):", predictions)


Epoch 0, Loss: 4.0334
Epoch 10, Loss: 1.5389
Epoch 20, Loss: 0.9638
Epoch 30, Loss: 0.7353
Epoch 40, Loss: 0.6242
Epoch 50, Loss: 0.5625
Epoch 60, Loss: 0.5240
Epoch 70, Loss: 0.4974
Epoch 80, Loss: 0.4775
Epoch 90, Loss: 0.4615
Epoch 100, Loss: 0.4478
Epoch 110, Loss: 0.4356
Epoch 120, Loss: 0.4245
Epoch 130, Loss: 0.4141
Epoch 140, Loss: 0.4044
Epoch 150, Loss: 0.3954
Epoch 160, Loss: 0.3870
Epoch 170, Loss: 0.3793
Epoch 180, Loss: 0.3722
Epoch 190, Loss: 0.3657
Epoch 200, Loss: 0.3598
Epoch 210, Loss: 0.3544
Epoch 220, Loss: 0.3497
Epoch 230, Loss: 0.3454
Epoch 240, Loss: 0.3416
Epoch 250, Loss: 0.3382
Epoch 260, Loss: 0.3352
Epoch 270, Loss: 0.3326
Epoch 280, Loss: 0.3303
Epoch 290, Loss: 0.3284
Epoch 300, Loss: 0.3267
Epoch 310, Loss: 0.3252
Epoch 320, Loss: 0.3239
Epoch 330, Loss: 0.3228
Epoch 340, Loss: 0.3219
Epoch 350, Loss: 0.3211
Epoch 360, Loss: 0.3205
Epoch 370, Loss: 0.3199
Epoch 380, Loss: 0.3194
Epoch 390, Loss: 0.3191
Epoch 400, Loss: 0.3187
Epoch 410, Loss: 0.3185
Epo