In [1]:
# Mohammadmilad Sayyad____801419978

# Problem 2(b): LR sweep for linear regression on Housing
import numpy as np, pandas as pd, torch

# --- load ---
CSV_PATH = "Housing.csv"
features = ["area","bedrooms","bathrooms","stories","parking"]
target   = "price"

df = pd.read_csv(CSV_PATH)
X = df[features].values.astype(np.float32)
y = df[target].values.astype(np.float32).reshape(-1,1)

# --- 80/20 split (fixed) ---
n = len(df)
idx = np.arange(n)
rng = np.random.default_rng(0)
rng.shuffle(idx)
cut = int(0.8*n)
tr, va = idx[:cut], idx[cut:]
Xtr, Xva = X[tr], X[va]
ytr, yva = y[tr], y[va]

# --- standardize inputs using TRAIN stats only ---
mu = Xtr.mean(axis=0)
sd = Xtr.std(axis=0); sd[sd==0]=1.0
Xtr_n = (Xtr - mu)/sd
Xva_n = (Xva - mu)/sd

Xt = torch.tensor(Xtr_n); yt = torch.tensor(ytr)
Xv = torch.tensor(Xva_n); yv = torch.tensor(yva)

mse = torch.nn.MSELoss()

def r2_score_torch(y_true, y_pred):
    y_true = y_true.detach()
    y_pred = y_pred.detach()
    ss_res = ((y_true - y_pred)**2).sum()
    ss_tot = ((y_true - y_true.mean())**2).sum()
    return 1.0 - ss_res/ss_tot

def train_once(lr, epochs=5000, log_every=500, seed=0):
    torch.manual_seed(seed)
    W = torch.zeros((Xt.shape[1],1), requires_grad=True)
    b = torch.zeros(1, requires_grad=True)
    opt = torch.optim.SGD([W,b], lr=lr)

    logs = []
    for epoch in range(1, epochs+1):
        yhat = Xt @ W + b
        loss = mse(yhat, yt)
        opt.zero_grad(); loss.backward(); opt.step()

        if epoch % log_every == 0:
            with torch.no_grad():
                tr_mse = mse(Xt@W+b, yt).item()
                va_mse = mse(Xv@W+b, yv).item()
                va_r2  = float(r2_score_torch(yv, Xv@W+b))
            logs.append({"lr":lr, "epoch":epoch,
                         "train_mse":tr_mse,
                         "val_mse":va_mse,
                         "val_r2":va_r2,
                         "W":W.detach().clone(), "b":b.detach().clone()})
    return logs

lrs = [0.1, 0.01, 0.001, 0.0001]
all_logs = []
for lr in lrs:
    logs = train_once(lr)
    print(f"\n=== Learning Rate: {lr} ===")
    for row in logs:
        print(f"Epoch {row['epoch']:4d} | "
              f"Train MSE: {row['train_mse']:,.2f} | "
              f"Val MSE: {row['val_mse']:,.2f} | "
              f"Val R^2: {row['val_r2']:.3f}")
    all_logs.extend(logs)

df_logs = pd.DataFrame([{k:v for k,v in row.items() if k not in ("W","b")}
                        for row in all_logs]).sort_values(["lr","epoch"]).reset_index(drop=True)
df_logs



=== Learning Rate: 0.1 ===
Epoch  500 | Train MSE: 1,470,189,797,376.00 | Val MSE: 1,792,479,723,520.00 | Val R^2: 0.462
Epoch 1000 | Train MSE: 1,470,189,797,376.00 | Val MSE: 1,792,479,723,520.00 | Val R^2: 0.462
Epoch 1500 | Train MSE: 1,470,189,797,376.00 | Val MSE: 1,792,479,723,520.00 | Val R^2: 0.462
Epoch 2000 | Train MSE: 1,470,189,797,376.00 | Val MSE: 1,792,479,723,520.00 | Val R^2: 0.462
Epoch 2500 | Train MSE: 1,470,189,797,376.00 | Val MSE: 1,792,479,723,520.00 | Val R^2: 0.462
Epoch 3000 | Train MSE: 1,470,189,797,376.00 | Val MSE: 1,792,479,723,520.00 | Val R^2: 0.462
Epoch 3500 | Train MSE: 1,470,189,797,376.00 | Val MSE: 1,792,479,723,520.00 | Val R^2: 0.462
Epoch 4000 | Train MSE: 1,470,189,797,376.00 | Val MSE: 1,792,479,723,520.00 | Val R^2: 0.462
Epoch 4500 | Train MSE: 1,470,189,797,376.00 | Val MSE: 1,792,479,723,520.00 | Val R^2: 0.462
Epoch 5000 | Train MSE: 1,470,189,797,376.00 | Val MSE: 1,792,479,723,520.00 | Val R^2: 0.462

=== Learning Rate: 0.01 ===
Epo

Unnamed: 0,lr,epoch,train_mse,val_mse,val_r2
0,0.0001,500,21574570000000.0,21021680000000.0,-5.307843
1,0.0001,1000,17752170000000.0,17314500000000.0,-4.195452
2,0.0001,1500,14683040000000.0,14337660000000.0,-3.302211
3,0.0001,2000,12210530000000.0,11940460000000.0,-2.582898
4,0.0001,2500,10213040000000.0,10005540000000.0,-2.0023
5,0.0001,3000,8595458000000.0,8440829000000.0,-1.532786
6,0.0001,3500,7282919000000.0,7173660000000.0,-1.152555
7,0.0001,4000,6216103000000.0,6146310000000.0,-0.844285
8,0.0001,4500,5347799000000.0,5312731000000.0,-0.594158
9,0.0001,5000,4640229000000.0,4636013000000.0,-0.391099


In [2]:
# choose best by highest val R^2 at epoch 5000
final_rows = [r for r in all_logs if r["epoch"]==5000]
best = max(final_rows, key=lambda r: r["val_r2"])

W_std = best["W"].numpy().flatten()
b_std = float(best["b"].item())
W_raw = W_std / sd
b_raw = b_std - float((W_std * mu / sd).sum())

print("\n=== BEST MODEL (by final Val R^2) ===")
print(f"LR: {best['lr']} | Val R^2: {best['val_r2']:.3f} | Val MSE: {best['val_mse']:,.2f}")
print("\nParameters on standardized inputs:")
for name, w in zip(features, W_std):
    print(f"W[{name:10s}] = {w: .6f}")
print(f"B = {b_std: .6f}")

print("\nEquivalent parameters on ORIGINAL feature scale:")
for name, w in zip(features, W_raw):
    print(f"W_raw[{name:10s}] = {w:,.3f}")
print(f"B_raw = {b_raw:,.3f}")



=== BEST MODEL (by final Val R^2) ===
LR: 0.001 | Val R^2: 0.462 | Val MSE: 1,792,069,992,448.00

Parameters on standardized inputs:
W[area      ] =  712250.000000
W[bedrooms  ] =  82702.539062
W[bathrooms ] =  597250.437500
W[stories   ] =  504002.906250
W[parking   ] =  296003.687500
B =  4777034.500000

Equivalent parameters on ORIGINAL feature scale:
W_raw[area      ] = 351.383
W_raw[bedrooms  ] = 115,467.078
W_raw[bathrooms ] = 1,193,606.250
W_raw[stories   ] = 564,595.062
W_raw[parking   ] = 348,826.000
B_raw = -169,892.500
