# Linear Regression 
---
---

## The Formula 

$$
\hat{y} = XW + b
$$

---

## Shapes

| Component | Meaning | Shape |
|---------|--------|-------|
| X | Feature matrix | (m, n) |
| W | Weight vector | (n, 1) |
| b | Bias | scalar |
| ŷ | Prediction | (m, 1) |

- **m** = number of rows (data points)  
- **n** = number of features  

---

## What Is Linear Regression Trying to Do?

Learn **W** and **b** such that predicted values (**ŷ**) are as close as possible to actual values (**y**).

---

## Error Measure (Loss Function)

### Mean Squared Error (MSE)

$$
MSE = \frac{1}{m} \sum (y - \hat{y})^2
$$

---

## How Does the Model Learn?


### Gradient Descent

1. Initialize weights **W** and bias **b**
2. Predict **ŷ**
3. Compute error
4. Update **W** and **b**
5. Repeat until convergence



### Update Rule

$$
W = W − α · dW
$$
$$
b = b − α · db
$$

---

## One-line Definition

**Linear Regression models a linear relationship between features and a continuous target by minimizing squared error using gradient descent.**

---


In [0]:
import pandas as pd
import numpy as np

In [0]:
def initialize_parameters(n_features):
    """
    Initialize parameters for Linear Regression.

    Parameters:
    n_features (int): Number of input features (columns in X)

    Returns:
    W (numpy.ndarray): Weight vector of shape (n_features, 1)
    b (float): Bias term initialized to 0.0
    """
    # Initialize weight vector (one weight per feature)
    # Shape: (n_features, 1)
    W = np.random.rand(n_features, 1)

    # Initialize bias term (scalar)
    b = 0.0

    return W, b


In [0]:
def predict(W, b, X):
    """
    Generate predictions using linear regression hypothesis.

    Parameters:
    W (numpy.ndarray): Weight vector of shape (n_features, 1)
    b (float): Bias term
    X (numpy.ndarray): Feature matrix of shape (m, n_features)

    Returns:
    y_pred (numpy.ndarray): Predicted values of shape (m, 1)
    """
    # Linear hypothesis: y_hat = XW + b
    y_pred = np.dot(X, W) + b

    return y_pred


In [0]:
def mean_squared_error(y, y_pred):
    """
    Compute Mean Squared Error (MSE).

    Parameters:
    y (numpy.ndarray): Actual values, shape (m, 1) or (m,)
    y_pred (numpy.ndarray): Predicted values, shape (m, 1) or (m,)

    Returns:
    float: Mean squared error
    """
    # Compute squared differences
    squared_error = (y - y_pred) ** 2

    # Compute mean of squared errors
    mse = np.mean(squared_error)

    return mse


In [0]:
def root_mean_squared_error(y, y_pred):
    """
    Compute Root Mean Squared Error (RMSE).

    Parameters:
    y (numpy.ndarray): Actual values, shape (m, 1) or (m,)
    y_pred (numpy.ndarray): Predicted values, shape (m, 1) or (m,)

    Returns:
    float: RMSE value
    """
    mse = np.mean((y - y_pred) ** 2)
    rmse = np.sqrt(mse)
    return rmse


In [0]:
def r2_score(y, y_pred):
    """
    Compute R-squared (R²) score.

    Parameters:
    y (numpy.ndarray): Actual values, shape (m, 1) or (m,)
    y_pred (numpy.ndarray): Predicted values, shape (m, 1) or (m,)

    Returns:
    float: R² score
    """
    ss_res = np.sum((y - y_pred) ** 2)
    ss_tot = np.sum((y - np.mean(y)) ** 2)

    r2 = 1 - (ss_res / ss_tot)
    return r2


In [0]:
def evaluate_model(y, y_pred):
    """
    Evaluate regression model performance.

    Returns:
    dict: RMSE and R² scores
    """
    return {
        "RMSE": root_mean_squared_error(y, y_pred),
        "R2": r2_score(y, y_pred)
    }


In [0]:
def compute_gradients(X, y, y_pred):
    """
    Compute gradients of the loss function with respect to
    weights and bias for Linear Regression.

    Parameters:
    X (numpy.ndarray): Feature matrix of shape (m, n_features)
    y (numpy.ndarray): Actual target values of shape (m, 1)
    y_pred (numpy.ndarray): Predicted values of shape (m, 1)

    Returns:
    dW (numpy.ndarray): Gradient w.r.t weights, shape (n_features, 1)
    db (float): Gradient w.r.t bias
    """
    # Number of training examples
    m = X.shape[0]

    # Error term (y_pred - y)
    error = y_pred - y

    # Gradient with respect to weights
    dW = (2 / m) * np.dot(X.T, error)

    # Gradient with respect to bias
    db = (2 / m) * np.sum(error)

    return dW, db


In [0]:
def update_parameters(W, b, dW, db, learning_rate):
    """
    Update weights and bias using gradient descent.

    Parameters:
    W (numpy.ndarray): Current weights, shape (n_features, 1)
    b (float): Current bias
    dW (numpy.ndarray): Gradient w.r.t weights, shape (n_features, 1)
    db (float): Gradient w.r.t bias
    learning_rate (float): Step size for gradient descent

    Returns:
    W (numpy.ndarray): Updated weights
    b (float): Updated bias
    """
    # Update weights
    W = W - learning_rate * dW

    # Update bias
    b = b - learning_rate * db

    return W, b


In [0]:
def gradient_descent(X, y, learning_rate, epochs):
    """
    Train Linear Regression model using Gradient Descent.

    Parameters:
    X (numpy.ndarray): Feature matrix of shape (m, n_features)
    y (numpy.ndarray): Target values of shape (m, 1)
    learning_rate (float): Learning rate for gradient descent
    epochs (int): Number of training iterations

    Returns:
    W (numpy.ndarray): Trained weights of shape (n_features, 1)
    b (float): Trained bias
    loss_history (list): MSE loss value at each epoch
    """
    # Initialize parameters
    n_features = X.shape[1]
    W, b = initialize_parameters(n_features)

    loss_history = []

    # Training loop
    for epoch in range(epochs):

        # Step 1: Prediction
        y_pred = predict(W, b, X)

        # Step 2: Compute loss
        # recording loss after 50 epochs
        loss = mean_squared_error(y, y_pred)
        
        if epoch % 50 == 0:
            loss_history.append(loss)
            print("loss after {} epochs is: {}".format(epoch ,loss))


        # Step 3: Compute gradients
        dW, db = compute_gradients(X, y, y_pred)

        # Step 4: Update parameters
        W, b = update_parameters(W, b, dW, db, learning_rate)

    return W, b, loss_history


In [0]:
def linear_regression_train(X, y, learning_rate=0.01, epochs=1000):
    """
    Train Linear Regression model.
    Accepts Pandas DataFrame / Series or NumPy arrays.
    """
    # Convert Pandas to NumPy if needed
    if hasattr(X, "values"):
        X = X.values

    if hasattr(y, "values"):
        y = y.values.reshape(-1, 1)

    # Validate shapes
    if X.shape[0] != y.shape[0]:
        raise ValueError("X and y must have same number of rows")

    W, b, loss_history = gradient_descent(X, y, learning_rate, epochs)

    return {
        "weights": W,
        "bias": b,
        "loss_history": loss_history
    }


In [0]:
def linear_regression_predict(X, model):
    """
    Generate predictions.
    Accepts Pandas DataFrame or NumPy array.
    """
    if hasattr(X, "values"):
        X = X.values

    return predict(model["weights"], model["bias"], X)


In [0]:
# Create feature DataFrame (m=6, n=2)
X_df = pd.DataFrame({
    "feature_1": [1, 2, 3, 4, 5, 6],
    "feature_2": [2, 1, 4, 3, 6, 5]
})

# Create target Series
y_df = pd.Series(
    [13, 12, 23, 22, 33, 32],  # 2*x1 + 3*x2 + 5
    name="target"
)

print(X_df)
print(y_df)

In [0]:
model = linear_regression_train(
    X=X_df,
    y=y_df,
    learning_rate=0.01,
    epochs=2000
)


In [0]:
print("Learned Weights:")
print(model["weights"])

print("\nLearned Bias:")
print(model["bias"])


In [0]:
y_pred = linear_regression_predict(X_df, model)

comparison_df = X_df.copy()
comparison_df["Actual_y"] = y_df.values
comparison_df["Predicted_y"] = y_pred.flatten()

print(comparison_df)


In [0]:
metrics = evaluate_model(y_df.values.reshape(-1, 1), y_pred)
print("Evaluation Metrics:", metrics)


In [0]:
X_new = pd.DataFrame({
    "feature_1": [7, 8],
    "feature_2": [8, 7]
})

y_new_pred = linear_regression_predict(X_new, model)
print("Predictions on new data:")
print(y_new_pred)
