In [4]:
import pandas as pd
import numpy as np

In [5]:
df = pd.read_csv("./scores.csv")

In [6]:
df.head()

Unnamed: 0,name,math,cs
0,david,92,98
1,laura,56,68
2,sanjay,88,81
3,wei,70,80
4,jeff,80,83


In [48]:
import numpy as np


def gradient_descent(x, y, iterations=100, learn_rate=0.01, tolerance=0.001):
    """
    Linear regression using gradient descent.
    y_pred = m*x + b  # m = slope, b = intercept
    """
    m, b = 0.0, 0.0
    n = len(x)
    prev_cost = float('inf')

    for i in range(iterations):
        y_pred = m * x + b
        cost = (1 / n) * sum(val**2 for val in (y - y_pred))

        # EARLY STOPPING
        if abs(prev_cost - cost) < tolerance:
            print(f"Converged at iteration {i}!")
            break
        prev_cost = cost

        m_der = -(2 / n) * sum(x * (y - y_pred))
        b_der = -(2 / n) * sum(y - y_pred)  # Fixed variable name

        m -= learn_rate * m_der
        b -= learn_rate * b_der

        print(f"m {m:.4f}, b {b:.4f}, iteration {i}, cost {cost:.4f}")

    return m, b, cost

In [7]:
x = np.array(df["math"])
y = np.array(df["cs"])

In [50]:
m,b,cost = gradient_descent(x, y, 1000, 0.00001)

m 0.0989, b 0.0014, iteration 0, cost 5199.1000
m 0.1885, b 0.0027, iteration 1, cost 4266.7671
m 0.2695, b 0.0038, iteration 2, cost 3502.6548
m 0.3429, b 0.0048, iteration 3, cost 2876.4112
m 0.4094, b 0.0058, iteration 4, cost 2363.1606
m 0.4695, b 0.0066, iteration 5, cost 1942.5158
m 0.5240, b 0.0074, iteration 6, cost 1597.7678
m 0.5733, b 0.0081, iteration 7, cost 1315.2226
m 0.6179, b 0.0087, iteration 8, cost 1083.6570
m 0.6583, b 0.0093, iteration 9, cost 893.8728
m 0.6949, b 0.0098, iteration 10, cost 738.3313
m 0.7280, b 0.0103, iteration 11, cost 610.8541
m 0.7580, b 0.0107, iteration 12, cost 506.3776
m 0.7851, b 0.0111, iteration 13, cost 420.7518
m 0.8097, b 0.0115, iteration 14, cost 350.5754
m 0.8319, b 0.0118, iteration 15, cost 293.0609
m 0.8521, b 0.0121, iteration 16, cost 245.9238
m 0.8703, b 0.0123, iteration 17, cost 207.2916
m 0.8868, b 0.0126, iteration 18, cost 175.6298
m 0.9017, b 0.0128, iteration 19, cost 149.6808
m 0.9153, b 0.0130, iteration 20, cost 12

In [51]:
print(m, b, cost)

1.0436576588279542 0.01488182108391937 31.81560123638189


In [58]:
df["y_pred"] = m * df["math"] + b
print(y)

0    96.031386
1    58.459711
2    91.856756
3    73.070918
4    83.507495
5    51.154107
6    67.852630
7    36.542900
8    68.896287
9    69.939945
Name: math, dtype: float64


In [61]:
from sklearn.metrics import mean_squared_error, r2_score
df

Unnamed: 0,name,math,cs,y_pred
0,david,92,98,96.031386
1,laura,56,68,58.459711
2,sanjay,88,81,91.856756
3,wei,70,80,73.070918
4,jeff,80,83,83.507495
5,aamir,49,52,51.154107
6,venkat,65,66,67.85263
7,virat,35,30,36.5429
8,arthur,66,68,68.896287
9,paul,67,73,69.939945


In [62]:
r2_score(df["cs"], df["y_pred"])

0.8983819309579294

In [64]:
mean_squared_error(df["cs"],df["y_pred"]) ** 0.5

5.640531999411216

In [66]:
def predict_cs(math_score, m=1.0445, b=0.0150):
    """Predict CS score with confidence interval."""
    pred = m * math_score + b
    ci_lower = pred - 5.64  # RMSE as uncertainty
    ci_upper = pred + 5.64
    return pred, (ci_lower, ci_upper)

In [None]:
r2_score(df["cs"], predict_cs(df["math"])[0])

0.8983940512312754