In [81]:
"""
Adam Nurlign 7/1/2025

Hello there! In this notebook I will be implementing various Linear Regression Machine Learning
Models for predicting continous values from continous input features. There are many
modules in Python such as PyTorch and Scikit-learn that give you access to linear regression models
but I thought it would be a good exercise to be able to implement this from scratch without looking
at my notes. I hope you enjoy!

There are two algorithms I implement for optimising the parameters of my Linear Regression model: Closed-form solution and
Gradient Descent.

Clarification: To be more specific I will be implementing Linear Regression with polynomial
feature engineering which will in effect allow us to fit any degree polynomial to the training
dataset.

"""

'\nAdam Nurlign 7/1/2025\n\nHello there! In this notebook I will be implementing various Linear Regression Machine Learning\nModels for predicting continous values from continous input features. There are many\nmodules in Python such as PyTorch and Scikit-learn that give you access to linear regression models\nbut I thought it would be a good exercise to be able to implement this from scratch without looking\nat my notes. I hope you enjoy!\n\nThere are two algorithms I implement for optimising the parameters of my Linear Regression model: Closed-form solution and \nGradient Descent.\n\nClarification: To be more specific I will be implementing Linear Regression with polynomial\nfeature engineering which will in effect allow us to fit any degree polynomial to the training\ndataset.\n\n'

In [82]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [83]:
data=np.loadtxt("sample_data/concrete.csv",delimiter=",",skiprows=1)
np.random.shuffle(data)
splitIndex=int(0.8*len(data))
ConcreteStrengthX=data[:splitIndex,:-1]
ConcreteStrengthY=data[:splitIndex,-1].reshape(-1,1)
print(ConcreteStrengthX.shape)
print(ConcreteStrengthY.shape)
ConcreteStrengthXTest=data[splitIndex:,:-1]
ConcreteStrengthYTest=data[splitIndex:,-1].reshape(-1,1)


(824, 8)
(824, 1)


In [84]:

def standardize_data(data):
    mean=np.mean(data,axis=0)
    std=np.std(data,axis=0)
    return (data-mean)/ std, mean, std

# Standardize training set and store stats
ConcreteStrengthX, train_mean, train_std = standardize_data(ConcreteStrengthX)

# Standardize test set using **training mean and std**
ConcreteStrengthXTest = (ConcreteStrengthXTest - train_mean) / train_std


In [85]:
def makeDesignMatrix(xRaw,degree):
  finalX=np.copy(xRaw)
  for i in range(2,degree+1):
    xPower=xRaw**degree
    finalX=np.concatenate((finalX,xPower),axis=1)
  return np.concatenate((np.ones((xRaw.shape[0],1)),finalX),axis=1)

In [86]:
def MSE(y,yHat):
  n=y.shape[0]
  difference=y-yHat
  differenceSquared=difference*difference
  return (np.sum(differenceSquared.flatten()))/n

In [87]:
def MAE(y,yHat):
  n=y.shape[0]
  difference=y-yHat
  absoluteDifference=np.abs(difference)
  return (np.sum(absoluteDifference))/n



In [88]:
def computeBatches(X,y,batch_size):
  n=X.shape[0]
  num_batches=n//batch_size
  listOfBatches=[]
  for i in range(num_batches):
    listOfBatches.append((X[i*batch_size:(i+1)*batch_size,:],y[i*batch_size:(i+1)*batch_size,:]))
  if (n%batch_size!=0):
    listOfBatches.append((X[num_batches*batch_size:,:],y[num_batches*batch_size:,:]))
  return listOfBatches


In [89]:
def computeGradient(X,y,w):
  #The gradient value that we compute depends on the X and y data we feed (which depends on batch) and the paramter as well
  n=X.shape[0]
  Xt=np.transpose(X)
  term1=(-1*Xt)@y
  term2=Xt@X@w
  return (2/n)*(term1+term2)

In [90]:
def shuffleDataset(X, y):
        idxs = np.arange(X.shape[0])
        np.random.shuffle(idxs)
        XShuffled, yShuffled= X[idxs, :], y[idxs, :]

        return X[idxs, :], y[idxs, :]

In [91]:
class LinearRegModel():
  def __init__(self):
    self.params=None
    self.degree=None
    self.lr=None

  def fit_closed_form(self,xRaw,trainY,degree):
    self.degree=degree
    X=makeDesignMatrix(xRaw,self.degree)
    #Initializes the parameters- is not necessary for the closed form solution implementation
    #but only for the gradient descent implementation

    self.params=np.ones((X.shape[1],1))
    Xt=np.transpose(X)
    temp1=Xt@X
    temp2=np.linalg.inv(temp1)
    self.params=temp2@Xt@trainY


  def fit_gradient_descent(self,xRaw,trainY,degree,num_epochs,batch_size,lr):
    #We will implement training via minibatch gradient descent which is a generalization of
    #stochastic gradient descent (batchsize=1) and gradient descent (batchsize=full)
    self.degree=degree
    self.lr=lr
    X=makeDesignMatrix(xRaw,self.degree)
    #Have to initialize paramaters
    self.params=np.ones((X.shape[1],1))
    for i in range(num_epochs):
      shuffle_X,shuffle_y=shuffleDataset(X,trainY)
      batchesList=computeBatches(shuffle_X,shuffle_y,batch_size)
      for (XBatch,yBatch) in batchesList:
        gradient=computeGradient(XBatch,yBatch,self.params)
        self.params=self.params-self.lr*gradient



  def predict(self,xRaw):
    X=makeDesignMatrix(xRaw,self.degree)
    return X@self.params

  #mean squared error loss function
  def computeMSE(self,xTestRaw,yTest):
    yHat=self.predict(xTestRaw)
    return MSE(yTest,yHat)

  #mean absolute absolute error loss function
  def computeMAE(self,xTestRaw,yTest):
    yHat=self.predict(xTestRaw)
    return MAE(yTest,yHat)

In [92]:
testModel1=LinearRegModel()
testModel1.fit_closed_form(ConcreteStrengthX,ConcreteStrengthY,2)

testModel2=LinearRegModel()
testModel2.fit_gradient_descent(ConcreteStrengthX,ConcreteStrengthY,2,500,100,0.02)

print("Here are the parameters for the Linear Regression Model that was trained using the closed-form solution:")
print(testModel1.params)
print("----------------------")
print("Here are the parameters for the Linear Regression Model that was trained using the gradient-descent algorithm:")
print(testModel2.params)
print("----------------------")
print("Here are the differences in the model's paramters: gradientDescentModel's parameters- closed-form solution model's parameters:")
print(testModel2.params-testModel1.params)


Here are the parameters for the Linear Regression Model that was trained using the closed-form solution:
[[42.36735528]
 [12.47601188]
 [ 9.34543882]
 [ 4.11217839]
 [-3.43416244]
 [ 2.71519318]
 [ 1.03204326]
 [ 1.05825802]
 [17.61631408]
 [-0.16640916]
 [-0.97403573]
 [-0.97743201]
 [ 0.7799715 ]
 [-1.51583976]
 [ 0.32740372]
 [-1.06594398]
 [-3.26664753]]
----------------------
Here are the parameters for the Linear Regression Model that was trained using the gradient-descent algorithm:
[[ 4.24839284e+01]
 [ 1.24674724e+01]
 [ 9.28203242e+00]
 [ 4.05618631e+00]
 [-3.53150438e+00]
 [ 2.58544175e+00]
 [ 1.01908141e+00]
 [ 1.10966289e+00]
 [ 1.76761257e+01]
 [ 4.21444080e-02]
 [-8.58659765e-01]
 [-9.22643545e-01]
 [ 8.53814141e-01]
 [-1.44183752e+00]
 [ 5.67553620e-01]
 [-1.14814882e+00]
 [-3.17008424e+00]]
----------------------
Here are the differences in the model's paramters: gradientDescentModel's parameters- closed-form solution model's parameters:
[[ 0.11657313]
 [-0.00853949]
 

In [93]:
#Now we can see how both of our linear regression models (trained in the two ways) performs on the testing dataset
#This uses the Mean Squared Error loss function
print("MSE for closed-form solution model on validation dataset:")
print(testModel1.computeMSE(ConcreteStrengthXTest,ConcreteStrengthYTest))
print("MSE for gradient descent model on validation dataset:")
print(testModel2.computeMSE(ConcreteStrengthXTest,ConcreteStrengthYTest))

MSE for closed-form solution model on validation dataset:
66.66735236167872
MSE for gradient descent model on validation dataset:
67.04971600813015


In [94]:
#Same thing but this uses the Mean Absolute Error loss function
print("MAE for closed-form solution model on validation dataset:")
print(testModel1.computeMAE(ConcreteStrengthXTest,ConcreteStrengthYTest))
print("MAE for gradient descent model on validation dataset:")
print(testModel2.computeMAE(ConcreteStrengthXTest,ConcreteStrengthYTest))

MAE for closed-form solution model on validation dataset:
6.3927804450547665
MAE for gradient descent model on validation dataset:
6.413939376443549


In [95]:
#As a sanity check I want to see that my models make accurate predictions on the data that they trained on
#This uses the Mean Squared Error loss function
print("MSE for closed-form solution model on training dataset:")
print(testModel1.computeMSE(ConcreteStrengthX,ConcreteStrengthY))
print("MSE for gradient descent model on training dataset:")
print(testModel2.computeMSE(ConcreteStrengthX,ConcreteStrengthY))

MSE for closed-form solution model on training dataset:
62.28269156991075
MSE for gradient descent model on training dataset:
63.45772112080617


In [96]:
#Same but this uses the Mean Absolute Error loss function
print("MAE for closed-form solution model on training dataset:")
print(testModel1.computeMAE(ConcreteStrengthX,ConcreteStrengthY))
print("MAE for gradient descent model on training dataset:")
print(testModel2.computeMAE(ConcreteStrengthX,ConcreteStrengthY))

MAE for closed-form solution model on training dataset:
5.980865058610757
MAE for gradient descent model on training dataset:
6.046420206544249
