In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
import numpy as np

class LinearRegressionCustom:
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None
        self.weights = None

    def fit(self, X, y):
        # Convert input to numpy arrays
        X = np.array(X, dtype=float)
        y = np.array(y, dtype=float).reshape(-1, 1)

        # Add bias term (column of 1s) to the input features
        ones = np.ones((X.shape[0], 1))
        X_b = np.hstack([X, ones])  # Augmented matrix with bias

        # OLS closed-form solution: w = (XᵀX)⁻¹ Xᵀy
        XTX = X_b.T @ X_b
        XTy = X_b.T @ y
        self.weights = np.linalg.inv(XTX) @ XTy

        self.coef_ = self.weights[:-1].flatten()     # Coefficients
        self.intercept_ = self.weights[-1][0]        # Intercept

    def predict(self, X):
        X = np.array(X, dtype=float)
        ones = np.ones((X.shape[0], 1))
        X_b = np.hstack([X, ones])  # Add bias term
        return X_b @ self.weights


**Result on Dataset 1: _Boston Housing Dataset_**

In [3]:
data = pd.read_csv('BostonHousing.csv')
data = data.iloc[:, 1:]
X = data.iloc[:, :-1]  # All columns except the last one
Y = data.iloc[:, -1]   # Only the last column
# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [4]:
# Results using custom model

# Create and train the Linear Regression model
model = LinearRegressionCustom()
model.fit(X_train, Y_train)
# Make predictions on test set
Y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R2 Score: {r2}")

Mean Squared Error: 24.348533587769186
R2 Score: 0.6679765786180947


In [5]:
# Results using sklean's model

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, Y_train)

# Predict on test set
Y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

# Print results
print(f"Mean Squared Error: {mse:.4f}")
print(f"R2 Score: {r2:.4f}")

Mean Squared Error: 24.3485
R2 Score: 0.6680


**Result on dataset 2: _Advertising Dataset_**

In [6]:
data = pd.read_csv('advertising.csv')
data = data.iloc[:, 1:]
X = data.iloc[:, :-1]  # All columns except the last one
Y = data.iloc[:, -1]   # Only the last column
# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [7]:
# Results using custom model

# Create and train the Linear Regression model
model = LinearRegressionCustom()
model.fit(X_train, Y_train)
# Make predictions on test set
Y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R2 Score: {r2}")

Mean Squared Error: 27.50063184992545
R2 Score: 0.11004359553784637


In [8]:
# Results using sklean's model

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, Y_train)

# Predict on test set
Y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

# Print results
print(f"Mean Squared Error: {mse:.4f}")
print(f"R2 Score: {r2:.4f}")

Mean Squared Error: 27.5006
R2 Score: 0.1100
