In [110]:
import numpy as np
import matplotlib.pyplot as plt

## From Single-Feature to Multi-Feature Linear Regression

In single-feature linear regression, each example has one input value and the model is:

$$
f_{w,b}(x) = wx + b
$$

Gradients are computed using simple loops over the training examples.

In multi-feature linear regression, each example has multiple input features.  
The model becomes:

$$
f_{w,b}(x^{(i)}) = w \cdot x^{(i)} + b
$$

To handle multiple features efficiently, the implementation uses
vectorized matrix operations instead of explicit loops.

Vectorization makes the code simpler and much faster while producing the same
results as the loop-based approach.


In [111]:
import numpy as np

def zscore_normalize_features(X):
    mu = np.mean(X, axis=0)
    sigma = np.std(X, axis=0)
    
    sigma[sigma == 0] = 1.0
    
    X_norm = (X - mu) / sigma
    return X_norm


Normalization helps gradient descent converge faster.

In [112]:
def compute_cost(X, y, w, b):
    m = X.shape[0]
    y_hat = X @ w + b
    cost = (1/(2*m)) * np.sum((y_hat - y)**2)
    return cost

X @ w is the matrix multiplication of matrix X and vector w.

In [113]:
def compute_gradient(X, y, w, b):
    m = X.shape[0]
    e = (X @ w + b) - y
    dj_dw = (1/m) * (X.T @ e)
    dj_db = (1/m) * np.sum(e)
    return dj_dw, dj_db


In [114]:
def gradient_descent(X, y, w_in, b_in, alpha, num_iters):
    w = w_in.copy()
    b = b_in

    for i in range(num_iters):
        dj_dw, dj_db = compute_gradient(X, y, w, b)

        w = w - alpha * dj_dw
        b = b - alpha * dj_db

    return w, b


In [115]:
from sklearn.datasets import load_diabetes

X_train, y_train = load_diabetes(return_X_y=True)
print(X_train.shape, y_train.shape)

(442, 10) (442,)


In [116]:
m = X_train.shape[0]
n = X_train.shape[1]
w_init = np.zeros(n)
b_init = 0.0
alpha = 0.1
iters = 3000
X_train = zscore_normalize_features(X_train)

w_final, b_final = gradient_descent(X_train, y_train, w_init, b_init, alpha, iters)

print("w_final:", w_final)
print("b_final:", b_final)


w_final: [ -0.46334361 -11.39253135  24.75884834  15.41678469 -34.90023404
  20.47050174   3.56291498   8.06723443  34.69870704   3.22689774]
b_final: 152.1334841628958


In [117]:
from sklearn.linear_model import LinearRegression
import numpy as np

sk = LinearRegression().fit(X_train, y_train)

y_pred_yours = X_train @ w_final + b_final
y_pred_sk = sk.predict(X_train)

print("MSE yours:", np.mean((y_pred_yours - y_train)**2))
print("MSE sk   :", np.mean((y_pred_sk - y_train)**2))

print("Your w, b:", w_final, b_final)
print("Sk w, b  :", sk.coef_, sk.intercept_)

MSE yours: 2859.827649465075
MSE sk   : 2859.69634758675
Your w, b: [ -0.46334361 -11.39253135  24.75884834  15.41678469 -34.90023404
  20.47050174   3.56291498   8.06723443  34.69870704   3.22689774] 152.1334841628958
Sk w, b  : [ -0.47612079 -11.40686692  24.72654886  15.42940413 -37.67995261
  22.67616277   4.80613814   8.42203936  35.73444577   3.21667372] 152.13348416289594
