# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression # Using SK's LinearRegression class
from sklearn.metrics import mean_squared_error, root_mean_squared_error # To estimate MSE and RMSE of both models

# Implement a custom LinearRegression class

In [2]:
class CustomLinearRegression: # Custom Linear Regression Class (Implementing a close version to SK's own LinearRegression class)
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def fit(self): # Using OLS via RSS
        y_bar = self.y.mean()
        x_bar = pd.DataFrame(self.x.mean())
        self.w = pd.Series(np.linalg.inv(self.x.T.dot(self.x).to_numpy()).dot(self.x.T.dot(self.y))).values
        e = y_bar - np.dot(self.w, x_bar)
        self.b = np.sqrt((e.T.dot(e))/(self.x.shape[0]-self.x.shape[1]))

    def predict(self, x): # X.W + B
        return np.add(np.dot(x, self.w), self.b)

    def fit_predict(self, x): # fit the model and predict new input in the same function
        self.fit()
        return self.predict(x)

# Initialize the Dataset

In [3]:
train = pd.read_csv("train.csv").dropna() # Train Dataset
test = pd.read_csv("test.csv") # Test Dataset
x_train, y_train = train.drop("y", axis=1), train.y
x_test, y_test = test.drop("y", axis=1), test.y

# Fit and test the model using the Custom LinearRegression class

In [4]:
cust_lin_reg = CustomLinearRegression(x_train, y_train)
y_pred = cust_lin_reg.fit_predict(x_test)
rss = y_test.subtract(y_pred).pow(2).sum()
mse = mean_squared_error(y_test, y_pred)
print(f"W (Coeffecients/Weights) = {cust_lin_reg.w}\nB (Intercept/Bias) = {cust_lin_reg.b}",
      f"RSS = {rss}\nMSE = {mean_squared_error(y_test, y_pred)}\nRMSE = {root_mean_squared_error(y_test, y_pred)}",
      sep='\n')

W (Coeffecients/Weights) = [0.99904948]
B (Intercept/Bias) = 0.0010180761646884937
RSS = 2835.9658980020777
MSE = 9.453219660006926
RMSE = 3.074608862929873


# Fit and test the model using SciKit-Learn's LinearRegression class

In [5]:
lin_reg = LinearRegression()
lin_reg.fit(x_train, y_train)
y_pred_2 = lin_reg.predict(x_test)
rss = y_test.subtract(y_pred_2).pow(2).sum()
mse = mean_squared_error(y_test, y_pred_2)
print(f"W (Coeffecients/Weights) = {lin_reg.coef_}\nB (Intercept/Bias) = {lin_reg.intercept_}",
      f"RSS = {rss}\nMSE = {mean_squared_error(y_test, y_pred_2)}\nRMSE = {root_mean_squared_error(y_test, y_pred_2)}",
      sep='\n')

W (Coeffecients/Weights) = [1.00065638]
B (Intercept/Bias) = -0.10726546430097272
RSS = 2829.8766576117914
MSE = 9.432922192039305
RMSE = 3.071306268029827
