Imports

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [6]:
X_1d = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
y_1d = np.array([6.2, 8.5, 9.1, 11.2, 12.8, 14.5, 15.1, 17.4, 18.2, 20.5, 21.1, 23.4, 24.8, 26.1, 27.5])

In [7]:
X_2d = np.array([
    [1500, 5], 
    [2000, 10], 
    [1200, 2], 
    [2400, 15], 
    [1800, 8],
    [3000, 20],
    [2200, 12],
    [1100, 1],
    [2700, 18],
    [1600, 6]
])
y_2d = np.array([300, 350, 280, 410, 330, 480, 370, 260, 440, 310])

In [8]:
class LinearRegression_OLS:
    def __init__(self):
        self.slope = None
        self.bias = None

    def fit(self, X, y):
        if X.ndim == 1:
            X = X.reshape(-1, 1)
        
        self.slope = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, y))
        self.bias = y.mean() - np.dot(X.mean(axis=0), self.slope)
    
    def predict(self, X):
        if X.ndim == 1:
            X = X.reshape(-1, 1)
        return np.dot(X, self.slope) + self.bias

    def calculate_mse(self, y_true, y_pred):
        return np.mean((y_true - y_pred)**2)


In [9]:
model1 = LinearRegression_OLS()
model1.fit(X_1d, y_1d)

predictions = model1.predict(X_1d)
error = model1.calculate_mse(y_1d, predictions)

print(f"MSE: {error}")

MSE: 4.554947022777195


In [10]:
model2 = LinearRegression_OLS()
model2.fit(X_2d, y_2d)

predictions = model2.predict(X_2d)
error = model2.calculate_mse(y_2d, predictions)

print(f"MSE: {error}")

MSE: 89.5222624189231


In [14]:
class LinearRegression_GD:
    def __init__(self):
        self.slope = None
        self.bias = 1
        self.l_rate = 0.01
        self.X_min = None
        self.X_max = None
        
    def fit(self, X, y, iter):
        if X.ndim == 1:
            X = X.reshape(-1, 1)
        
        self.slope = np.zeros(X.shape[1])
        n = len(X)
        
        for _ in range(iter):
            y_pred = np.dot(X, self.slope) + self.bias
            error = y_pred - y
            
            Dm = (2/n) * np.dot(X.T, error)
            Dc = (2/n) * np.sum(error)
            
            self.slope -= self.l_rate * Dm
            self.bias -= self.l_rate * Dc
            
    def predict(self, X):
        X = np.array(X)
        if X.ndim == 1:
            X = X.reshape(-1, 1)
            
        return np.dot(X, self.slope) + self.bias

    def calculate_mse(self, y_true, y_pred):
        y_true = np.array(y_true).flatten()
        y_pred = np.array(y_pred).flatten()
        return np.mean((y_true - y_pred)**2)

In [None]:
#Scaling
X_mean, X_std = X_1d.mean(), X_1d.std()
y_mean, y_std = y_1d.mean(), y_1d.std()
X_scaled = (X_1d - X_mean) / X_std
y_scaled = (y_1d - y_mean) / y_std

#Initialization
m_1d = LinearRegression_GD()
m_1d.l_rate = 0.1
m_1d.fit(X_scaled, y_scaled, iter=2000)

#Prediction
res_scaled = m_1d.predict(X_scaled)
res_final = (res_scaled * y_std) + y_mean

mse = m_1d.calculate_mse(y_1d, res_final)

print(f"Final MSE (Original Units): {mse}")

Final MSE (Original Units): 0.0028456634466183504


In [17]:
X_mean = X_2d.mean(axis=0)
X_std = X_2d.std(axis=0)
y_mean = y_2d.mean()
y_std = y_2d.std()

X_scaled = (X_2d - X_mean) / X_std
y_scaled = (y_2d - y_mean) / y_std

m_2d = LinearRegression_GD()
m_2d.l_rate = 0.1
m_2d.fit(X_scaled, y_scaled, iter=1000)

res_scaled = m_2d.predict(X_scaled)
res_final = (res_scaled * y_std) + y_mean
mse = m_2d.calculate_mse(y_2d, res_final)

print(f"New 2D MSE: {mse}")
print(f"Slopes: {m_2d.slope}")
print(f"Bias: {m_2d.bias}")

New 2D MSE: 46.3024319799647
Slopes: [0.47152165 0.52386036]
Bias: -5.688323547189514e-17
