In [81]:
import pandas as pd
import numpy as np
import math
from sklearn.linear_model import LinearRegression
from tqdm import tqdm

In [82]:
df = pd.read_csv("test_scores.csv")
X = np.array(df['math'])
y = np.array(df['cs'])

In [83]:
def gradient_descent(x, y, learning_rate=0.01, max_iter=1000, tol=1e-42):
    m_curr = b_curr = 0
    n = len(x)
    prev_cost = 0

    for i in tqdm(range(max_iter)):
        y_predicted = m_curr * x + b_curr

        # mse cost function
        cost = (1/n) * sum([val**2 for val in (y - y_predicted)])

        if i != 0 and cost > prev_cost:
            print(f"Cost increased at iteration {i}, stopping optimization.")
            break
        prev_cost = cost
        # Gradient descent
        # m = m - learning_rate * dm
        # b = b - learning_rate * db
        # dm = -(2/n) * sum(x * (y - y_predicted))
        # db = -(2/n) * sum(y - y_predicted)
        md = -(2/n) * sum(x * (y - y_predicted))
        bd = -(2/n) * sum(y - y_predicted)

        m_curr -= learning_rate * md
        b_curr -= learning_rate * bd
    
        if i % (max_iter / 10) == 0:
            print(f"Iteration {i}: m = {m_curr}, b = {b_curr}, cost = {cost}")
        # Check for convergence
        if abs(md) < tol and abs(bd) < tol:
            print(f"Converged after {i} iterations.")
            break
        # Check for numerical stability
        if math.isclose(m_curr, 1e-20) and math.isclose(b_curr, 1e-20):
            print("Parameters are close to zero, stopping optimization.")
            break

    return m_curr, b_curr

In [84]:
m, b = gradient_descent(X, y, learning_rate=0.00007, max_iter=1_000_000, tol=1e-42)

  3%|▎         | 28324/1000000 [00:00<00:06, 141250.60it/s]

Iteration 0: m = 0.692426, b = 0.009786, cost = 5199.1


 11%|█▏        | 112770/1000000 [00:00<00:06, 131441.39it/s]

Iteration 100000: m = 1.0297435498235956, b = 1.064292279784712, cost = 31.64607835452838


 22%|██▏       | 216049/1000000 [00:01<00:06, 118954.13it/s]

Iteration 200000: m = 1.0231124742338216, b = 1.534219394369305, cost = 31.61284457607173


 31%|███▏      | 314920/1000000 [00:02<00:04, 140470.93it/s]

Iteration 300000: m = 1.0201434308790205, b = 1.7446277900977334, cost = 31.606181959174577


 42%|████▏     | 416339/1000000 [00:03<00:04, 142646.92it/s]

Iteration 400000: m = 1.0188140510034114, b = 1.8388374911747587, cost = 31.604846255984036


 52%|█████▏    | 516263/1000000 [00:03<00:03, 138488.37it/s]

Iteration 500000: m = 1.0182188253221103, b = 1.8810195901651325, cost = 31.60457847786609


 62%|██████▏   | 615563/1000000 [00:04<00:02, 141479.08it/s]

Iteration 600000: m = 1.0179523148850405, b = 1.8999064930518983, cost = 31.60452479444228


 72%|███████▏  | 716147/1000000 [00:05<00:02, 134700.14it/s]

Iteration 700000: m = 1.0178329856704276, b = 1.9083630447454378, cost = 31.60451403213652


 82%|████████▏ | 815195/1000000 [00:06<00:01, 138544.55it/s]

Iteration 800000: m = 1.0177795563878196, b = 1.9121494393160319, cost = 31.604511874538925


 91%|█████████▏| 914533/1000000 [00:06<00:00, 142350.35it/s]

Iteration 900000: m = 1.0177556335936464, b = 1.9138447856420389, cost = 31.60451144198966


100%|██████████| 1000000/1000000 [00:07<00:00, 133518.59it/s]


In [85]:
lr = LinearRegression()
lr.fit(df[['math']], df[['cs']])
print(f"Linear Regression Coefficients: {lr.coef_[0][0]}, Intercept: {lr.intercept_[0]}")
print(f"Custom Gradient Descent Coefficients: {m}, Intercept: {b}")
assert math.isclose(m, lr.coef_[0][0], rel_tol=1e-5), "Coefficients do not match!"

Linear Regression Coefficients: 1.0177362378569323, Intercept: 1.9152193111569318
Custom Gradient Descent Coefficients: 1.0177449223071622, Intercept: 1.9146038667115937
