In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the Boston Housing dataset
boston = load_boston()

In [3]:
# Extract features (X) and target variable (y)
X = boston.data
y = boston.target

In [4]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# Add intercept term to X_train
intercept = np.ones((X_train_scaled.shape[0], 1))
X_train_scaled = np.concatenate((intercept, X_train_scaled), axis=1)

In [7]:
# Function to compute mean squared error
def compute_mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [8]:
# Function to perform gradient descent for multiple linear regression
def gradient_descent(X, y, learning_rate, num_iterations):
    num_samples, num_features = X.shape
    theta = np.zeros(num_features)  # Initialize parameters with zeros
    
    for i in range(num_iterations):
        # Calculate predictions
        predictions = np.dot(X, theta)
        
        # Calculate error
        error = predictions - y
        
        # Calculate gradients
        gradients = (1/num_samples) * np.dot(X.T, error)
        
        # Update parameters
        theta -= learning_rate * gradients
        
        # Print MSE every 100 iterations
        if i % 100 == 0:
            mse = compute_mse(y, np.dot(X, theta))
            print(f"Iteration {i}, MSE: {mse}")
    
    return theta

In [9]:
# Perform gradient descent
learning_rate = 0.01
num_iterations = 1000
theta = gradient_descent(X_train_scaled, y_train, learning_rate, num_iterations)

Iteration 0, MSE: 591.800632334655
Iteration 100, MSE: 94.72013035974838
Iteration 200, MSE: 32.59418905610845
Iteration 300, MSE: 24.003089771852146
Iteration 400, MSE: 22.63492554426304
Iteration 500, MSE: 22.298867468455693
Iteration 600, MSE: 22.1431819037471
Iteration 700, MSE: 22.03998702641069
Iteration 800, MSE: 21.963689447904155
Iteration 900, MSE: 21.905304156403805


In [10]:
# Add intercept term to X_test
intercept = np.ones((X_test_scaled.shape[0], 1))
X_test_scaled = np.concatenate((intercept, X_test_scaled), axis=1)

In [11]:
# Make predictions on test set
predictions = np.dot(X_test_scaled, theta)

In [12]:
# Compute MSE on test set
mse_test = compute_mse(y_test, predictions)
print(f"Test MSE: {mse_test}")

Test MSE: 25.34969307651813
