## Cost Function

Is function to check how far the model precict wrong value (Currently we use MSE) and Find weight and bias with optimizer using Gradient descent

In [5]:
import numpy as np
import matplotlib.pyplot as plt


# Actual values (ground truth)
y_true = np.array([3.0, -0.5, 2.0, 7.0])

# Predicted values by model
y_pred = np.array([2.5, 0.0, 2.1, 7.8])

# MSE function
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# Calculate
mse = mean_squared_error(y_true, y_pred)
print(f"Mean Squared Error: {mse:.4f}")




errors = y_true - y_pred
squared_errors = errors ** 2

plt.bar(range(len(y_true)), squared_errors, color='skyblue')
plt.title("Squared Errors per Example")
plt.xlabel("Data Point")
plt.ylabel("Squared Error")
plt.show()


ModuleNotFoundError: No module named 'matplotlib'

##Test Lab : Linear Regression using 2 Features



```
Prediction:  ŷ = wx + b
Cost:        MSE = (1/n) * Σ(y - ŷ)^2
Goal:        Find best w and b that minimize MSE
```



In [None]:
# 📍 Step 1: Import Libraries
import numpy as np
import matplotlib.pyplot as plt

# 📍 Step 2: Simulate Some Data
# Let's say this is house size (in square feet)
X = np.array([500, 750, 1000, 1250, 1500])  # input feature
y = np.array([150000, 220000, 250000, 320000, 370000])  # price in $
# y_easy = np.array([150000, 200000, 250000, 300000, 350000])  # price in $ This dataset is easy to predict because the pattern so obvious

# Visualize
plt.scatter(X, y, color='blue')
plt.xlabel("Size (sqft)")
plt.ylabel("Price ($)")
plt.title("House Price vs Size")
plt.show()

## Using manual parameter tuning

In [None]:
# 📍 Step 3: Define the Prediction Function
# y_pred = w * x + b

def predict(X, w, b):
    return w * X + b

# 📍 Step 4: Define the Cost Function (MSE)

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# 📍 Step 5: Try a Sample Prediction and Compute Cost

# Try some random weights (Hyperparameter) that we need to tune based on model (this is random) can be manually tuned or we can find best weight and bias
# Using gradient descent

w = 200  # $200 per sqft
b = 50000  # base price

y_pred = predict(X, w, b)
mse = mean_squared_error(y, y_pred)

print(f"Predicted Prices: {y_pred}")
print(f"Mean Squared Error: {mse:.2f}")


# 📍 Step 6: Plot Prediction Line vs True Data
plt.scatter(X, y, color='blue', label='Actual')
plt.plot(X, y_pred, color='red', label='Prediction')
plt.xlabel("Size (sqft)")
plt.ylabel("Price ($)")
plt.title("Prediction vs Actual")
plt.legend()
plt.show()


In [None]:
# Add the new square footage values to the existing X array
X_new = np.array([2200, 3300, 3500])
X_combined = np.concatenate((X, X_new))

print(X_combined)

# Use the predict function with the current w and b to get predictions for the new sizes
y_pred_new = predict(X_new, w, b)

print(f"New Square Footage Values: {X_new}")
print(f"Predicted Prices for New Sizes: {y_pred_new}")

# You can also visualize these new predictions on the plot
plt.scatter(X, y, color='blue', label='Actual (Original)')
plt.scatter(X_new, y_pred_new, color='green', label='Predicted (New)') # Plot new predictions
plt.plot(X_combined, predict(X_combined, w, b), color='red', label='Prediction Line') # Plot the full line
plt.xlabel("Size (sqft)")
plt.ylabel("Price ($)")
plt.title("Prediction vs Actual (with New Predictions)")
plt.legend()
plt.show()

## Optimizer Implementation using Gradient Descent

In gradient descent you only need to tune learning rate and epoch (iteration)

In [None]:
# Gradient computation
def compute_gradients(x, y, y_pred):
    n = len(x)
    dw = (-2 / n) * np.sum(x * (y - y_pred))
    db = (-2 / n) * np.sum(y - y_pred)
    return dw, db

def normalize(array):
    """Normalize to mean 0 and std 1"""
    return (array - np.mean(array)) / np.std(array)

# ----------------------------
# Training Function
# ----------------------------

def train(x, y, learning_rate=0.01, epochs=100):
    w, b = 0.0, 0.0
    cost_history = []

    for epoch in range(epochs):
        y_pred = predict(x, w, b)
        cost = mean_squared_error(y, y_pred)
        dw, db = compute_gradients(x, y, y_pred)

        # Update weights
        w -= learning_rate * dw
        b -= learning_rate * db

        cost_history.append(cost)

        if epoch % 100 == 0:
            print(f"Epoch {epoch}: Cost = {cost:.4f}, w = {w:.4f}, b = {b:.4f}")

    return w, b, cost_history


# ----------------------------
# Main Execution
# ----------------------------

# Normalize input and output
X_norm = normalize(X)
y_norm = normalize(y)

# Train the model
w, b, costs = train(X_norm, y_norm, learning_rate=0.01, epochs=1000)

print(f"\n🎯 Final model: y = {w:.4f}x + {b:.4f}")

# ----------------------------
# Visualize Cost Over Time
# ----------------------------

plt.figure(figsize=(8, 4))
plt.plot(costs)
plt.title("Cost over Epochs")
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.grid(True)
plt.show()

# ----------------------------
# Visualize Fit
# ----------------------------

plt.figure(figsize=(8, 4))
plt.scatter(X_norm, y_norm, label='Data')
plt.plot(X_norm, predict(X_norm, w, b), color='red', label='Model')
plt.title("Linear Fit After Training")
plt.xlabel("Normalized X")
plt.ylabel("Normalized y")
plt.legend()
plt.grid(True)
plt.show()

## This use case above is good , but too many epoch so we waste a lot of computational resource , in next step i will implement "Early Stop" so it will stop when the `cost` is optimal enough.

In [None]:
# Gradient computation
def compute_gradients(x, y, y_pred):
    n = len(x)
    dw = (-2 / n) * np.sum(x * (y - y_pred))
    db = (-2 / n) * np.sum(y - y_pred)
    return dw, db

def normalize(array):
    """Normalize to mean 0 and std 1"""
    return (array - np.mean(array)) / np.std(array)

# ----------------------------
# Training Function
# ----------------------------

def train(x, y, learning_rate=0.01, epochs=100, early_stop_threshold=1e-6, patience=10):
    w, b = 0.0, 0.0
    cost_history = []
    best_cost = float('inf')
    patience_counter = 0

    for epoch in range(epochs):
        y_pred = predict(x, w, b)
        cost = mean_squared_error(y, y_pred)
        dw, db = compute_gradients(x, y, y_pred)

        # Update weights
        w -= learning_rate * dw
        b -= learning_rate * db

        cost_history.append(cost)

        # Patience-based early stopping
        if best_cost - cost < early_stop_threshold:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"⏹️ Early stopping at epoch {epoch}")
                print(f"📉 Cost = {cost:.6f}, w = {w:.4f}, b = {b:.4f}")
                break
        else:
            best_cost = cost
            patience_counter = 0

        # Log every x epochs
        if epoch % 10 == 0:
            print(f"📉 Epoch {epoch}: Cost = {cost:.6f}, w = {w:.4f}, b = {b:.4f}")

    # Ensure return always happens
    return w, b, cost_history




# ----------------------------
# Main Execution
# ----------------------------

# Normalize input and output
X_norm = normalize(X)
y_norm = normalize(y)

# Train the model
w, b, costs = train(X_norm, y_norm, learning_rate=0.01, epochs=1000, early_stop_threshold=1e-5, patience=5)


print(f"\n🎯 Final model: y = {w:.4f}x + {b:.4f}")

# ----------------------------
# Visualize Cost Over Time
# ----------------------------

plt.figure(figsize=(8, 4))
plt.plot(costs)
plt.title("Cost over Epochs")
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.grid(True)
plt.show()

# ----------------------------
# Visualize Fit
# ----------------------------

plt.figure(figsize=(8, 4))
plt.scatter(X_norm, y_norm, label='Data')
plt.plot(X_norm, predict(X_norm, w, b), color='red', label='Model')
plt.title("Linear Fit After Training")
plt.xlabel("Normalized X")
plt.ylabel("Normalized y")
plt.legend()
plt.grid(True)
plt.show()