In [1]:
import numpy as np
import pandas as pd
from math import sqrt
from sklearn.model_selection import train_test_split

In [2]:
dataFrame = pd.read_csv("MultiVariableRegressionDataset.csv")

In [3]:
dataFrame['CONSTANT'] = 1

In [4]:
featureDframe = dataFrame[['CONSTANT','MONTH', 'DAY', 'YEAR', 'HOLIDAY']]
outputDframe = dataFrame['COUNT']

In [5]:
featureMatrix = featureDframe.to_numpy()
outputMatrix = outputDframe.to_numpy()

In [6]:
xTrain, xTest, yTrain, yTest = train_test_split(featureMatrix, outputMatrix, test_size=0.2, random_state=42)

In [7]:
def getRsme(predictedOutput, actualOutput):
    squaredError = (predictedOutput - actualOutput) ** 2
    meanSquaredError = squaredError.mean()
    rsme = np.sqrt(meanSquaredError)
    return rsme

In [8]:
def predictOutput(featureMatrix, weights):
    return np.dot(featureMatrix, weights)

In [9]:
def featureDerivative(errors, feature):
    return 2 * np.dot(errors, feature)

In [10]:
def gradientDescent(featureMatrix, outputMatrix, initialWeights, stepSize, tolerance):
    count = 0
    converged = False
    weights = np.array(initialWeights)
    while not converged:
        predictions = predictOutput(featureMatrix, weights)
        errors = predictions - outputMatrix
        gradientSumSquares = 0
        for i in range(len(weights)):
            derivative = featureDerivative(errors, featureMatrix[:,i])
            gradientSumSquares += derivative * derivative
            weights[i] -= stepSize * derivative
        gradientMagnitude = sqrt(gradientSumSquares)
        count += 1
        if count == 500000:
            count = 0
            print(f"GM: {gradientMagnitude} | T: {tolerance}")
        if gradientMagnitude < tolerance:
            converged = True
    return weights

In [None]:
initialWeights = np.array([-150., 0., 0., 0., 0.])
stepSize = 4e-12
tolerance = 1
weights = gradientDescent(xTrain, yTrain, initialWeights, stepSize, tolerance)
print(weights)

GM: 84770.15740595221 | T: 1
GM: 76667.9659998354 | T: 1
GM: 70651.65939160605 | T: 1
GM: 65848.23807127637 | T: 1
GM: 61772.33793023534 | T: 1
GM: 58160.347991914954 | T: 1
GM: 54869.67961437347 | T: 1
GM: 51822.06755967885 | T: 1
GM: 48973.002152828754 | T: 1
GM: 46295.636071590736 | T: 1
GM: 43772.41748153486 | T: 1
GM: 41390.76599783959 | T: 1
GM: 39140.84161962743 | T: 1
GM: 37014.39116662197 | T: 1
GM: 35004.14795124948 | T: 1
GM: 33103.515271855584 | T: 1
GM: 31306.395614677956 | T: 1
GM: 29607.094846716314 | T: 1
GM: 28000.265204224135 | T: 1
GM: 26480.868547023743 | T: 1
GM: 25044.150383585376 | T: 1
GM: 23685.619791785397 | T: 1
GM: 22401.032722674423 | T: 1
GM: 21186.377383119325 | T: 1
GM: 20037.861011865065 | T: 1
GM: 18951.897680818398 | T: 1
GM: 17925.096916320046 | T: 1
GM: 16954.25301949998 | T: 1
GM: 16036.33500868305 | T: 1
GM: 15168.477130125633 | T: 1
GM: 14347.969895944783 | T: 1
GM: 13572.2516154067 | T: 1
GM: 12838.900390059242 | T: 1
GM: 12145.626546038704 | T:

In [None]:
predictions = predictOutput(xTest, weights)

In [None]:
print(getRsme(predictions, yTest))

In [None]:
for i in range(len(yTest)):
    print(f"PREDICTED: {predictions[i]} | ACTUAL: {yTest[i]} | DIFFERENCE: {abs(predictions[i] - yTest[i])} | DATE: {xTest[i][1:]}")