# Linear Regression with Analytic solution

In this notebook, we will find the analytical solution for Linear Regression, with nonlinear basis functions.

In [1]:
import numpy as np
from dataset import X_train, X_test, y_train, y_test

## Input augmentation

Add a column of 1's to X_train and X_test

In [3]:
# Add column to X_train and X_test
X_train = np.hstack((np.ones(X_train.shape[0]).reshape(-1, 1), X_train))
X_test = np.hstack((np.ones(X_test.shape[0]).reshape(-1, 1), X_test))

# Find analytical solution

In [8]:
def find_w_analytical(X, y):
    """
    Find the analytical solution of linear regression
    :param X: the augmented training features
    :param y: the augmented training labels
    :return: the optimal weight w
    """
    # Find the solution to (X.T @ X) w = X^T y
    w = np.linalg.solve(X.T @ X, X.T @ y)
    return w

def make_prediction(X, w):
    """
    Make prediction based on the test features
    :param X: the augmented test features
    :param w: the optimal parameter
    :return: the predicted label
    """
    return X @ w

In [10]:
# Find the analytical solution of w
w_optimal = find_w_analytical(X_train, y_train)

# Make prediction on training set and test set
y_train_predicted = make_prediction(X_train, w_optimal)
y_test_predicted = make_prediction(X_test, w_optimal)

# Calculate the mean squared error
mse_train = (1 / (2 * X_train.shape[0])) * np.dot(y_train - y_train_predicted, y_train - y_train_predicted)
mse_test = (1 / (2 * X_test.shape[0])) * np.dot(y_test - y_test_predicted, y_test - y_test_predicted)

# Display the MSE of the train and test set
print(f"MSE for training set: {mse_train}")
print(f"MSE for test set: {mse_test}")

MSE for training set: 0.0021246980206582333
MSE for test set: 0.0017458727452898124
