In [1]:
import numpy as np

# --------------------------------------
# 1. Training data (X = inputs, Y = outputs)
# --------------------------------------
# Example: Predicting house prices
# Features: [Size in sq ft, Number of bedrooms]
X = np.array([
    [2104, 3],
    [1600, 2],
    [2400, 4],
    [1416, 2],
    [3000, 4]
], dtype=float)

# Target: Price in $1000s
Y = np.array([399.9, 329.9, 369.0, 232.0, 539.9], dtype=float)

# Number of training examples and features
m = X.shape[0]  # number of rows
n = X.shape[1]  # number of features

# --------------------------------------
# 2. Feature Scaling (normalize each column)
# --------------------------------------
X_mean = np.mean(X, axis=0)
X_std = np.std(X, axis=0)
X = (X - X_mean) / X_std  # Z = (X - mean) / std

# --------------------------------------
# 3. Initialize weights and bias
# --------------------------------------
W = np.zeros(n)  # weights for features
B = 0            # bias (intercept)

# --------------------------------------
# 4. Define the cost function (Mean Squared Error)
# Formula: Cost = (1/2m) * Σ((prediction - actual)^2)
# --------------------------------------
def compute_cost(X, Y, W, B):
    m = len(Y)
    predictions = X @ W + B
    errors = predictions - Y
    cost = (1 / (2 * m)) * np.sum(errors ** 2)
    return cost

# --------------------------------------
# 5. Compute gradients
# Formula:
#   dW = (1/m) * Σ((prediction - actual) * x)
#   dB = (1/m) * Σ((prediction - actual))
# --------------------------------------
def compute_gradients(X, Y, W, B):
    m = len(Y)
    predictions = X @ W + B
    errors = predictions - Y
    dW = (1 / m) * (X.T @ errors)
    dB = (1 / m) * np.sum(errors)
    return dW, dB

# --------------------------------------
# 6. Gradient Descent Algorithm
# --------------------------------------
def gradient_descent(X, Y, W, B, learning_rate, iterations):
    cost_history = []

    for i in range(iterations):
        dW, dB = compute_gradients(X, Y, W, B)

        # Update step:
        # W = W - α * dW
        # B = B - α * dB
        W = W - learning_rate * dW
        B = B - learning_rate * dB

        if i % 100 == 0:
            cost = compute_cost(X, Y, W, B)
            cost_history.append(cost)
            print(f"Iteration {i:4d} | Cost: {cost:.2f}")

    return W, B, cost_history

# --------------------------------------
# 7. Run Gradient Descent
# --------------------------------------
learning_rate = 0.1
iterations = 1000
W_final, B_final, cost_history = gradient_descent(X, Y, W, B, learning_rate, iterations)

# --------------------------------------
# 8. Show final parameters
# --------------------------------------
print("\nFinal weights (W):", W_final)
print("Final bias (B):", B_final)

# --------------------------------------
# 9. Make predictions
# --------------------------------------
print("\nPredictions vs Actual:")
for i in range(m):
    prediction = X[i] @ W_final + B_final
    print(f"Prediction: {prediction:.2f} | Actual: {Y[i]}")


Iteration    0 | Cost: 60383.47
Iteration  100 | Cost: 474.04
Iteration  200 | Cost: 261.57
Iteration  300 | Cost: 201.35
Iteration  400 | Cost: 184.28
Iteration  500 | Cost: 179.44
Iteration  600 | Cost: 178.07
Iteration  700 | Cost: 177.68
Iteration  800 | Cost: 177.57
Iteration  900 | Cost: 177.54

Final weights (W): [173.87055345 -85.33847914]
Final bias (B): 374.13999999999976

Predictions vs Actual:
Prediction: 374.14 | Actual: 399.9
Prediction: 315.49 | Actual: 329.9
Prediction: 369.21 | Actual: 369.0
Prediction: 259.25 | Actual: 232.0
Prediction: 552.61 | Actual: 539.9
