In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import plotly.express as px

In [None]:
def normalize(data):
    for i in range(0,data.shape[1]-1):
        data[:,i] = ((data[:,i] - np.mean(data[:,i]))/np.std(data[:, i]))

In [None]:
def load_data(filename):
    df = pd.read_csv(filename, sep=";", index_col=False, header=None)
    data = np.array(df, dtype=float)
    normalize(data)
    return data[:,:-1], data[:, -1]

In [None]:
# load the dataset
x,y = load_data("data/housePrices.csv")

# y should become a 1D array
y = np.reshape(y, (len(y),1))

# add a column with only ones for the intercept
x = np.hstack((np.ones((x.shape[0],1)), x))

# make for every feature in x a beta (regression parameter)
# B0 = intercept
# the betas are your model, so you have to save them and update them
betas = [np.zeros((x.shape[1], 1))]
print(betas)

# keep track of the cost
cost=[]

In [None]:
def prediction(x):
    return np.matmul(x, betas)

In [None]:
def cost_function(x, y):
    return ((prediction(x)-y).T@(prediction(x)-y))/(2*y.shape[0])

In [None]:
def updateBetas(eventTuple, timePassed, learning_rate=0.1):

    # number of datapoints, so this is now just one
    # m = x.shape[0]

    # prediction based on the current betas
    predictedTimePassed = prediction(eventTuple)
    print('predicted house price: (should be 0 at first iteration) ', predictedTimePassed)

    # calculate how far you were off
    # error should have the shape [[error B0], [error B1], [error B2]]
    # eventTuple.T should have the shape [[1],[x_feature1], [x_feature2]]
    # predictedTimePassed and TimePassed should have the shape [[value]]
    error = np.reshape(eventTuple, (len(eventTuple),1))@(predictedTimePassed - timePassed)

    # cost_function:
    cost.append(cost_function(x, y))

    # return the updated betas
    betas[0] = betas[0] - learning_rate * error

In [None]:
# run the model
[updateBetas(eventTuple, timePassed, learning_rate=0.1) for eventTuple, timePassed in zip(x, y)]

print(betas)

In [None]:
fig = px.scatter(y=cost)
fig.show()