In [74]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_validate
import sklearn.datasets as datasets
import pandas as pd
import numpy as np
import random

In [75]:
calif_housing = datasets.fetch_california_housing(as_frame = True)

In [76]:
x_train, x_test, y_train, y_test = train_test_split(calif_housing['data'],calif_housing['target'])

In [97]:
def getModelPredictions(x,weights):
    #time complexity is O(n*k), where n is number of datapoints, is number of values per input
    
    ans = []
    for i in range(len(x)):
        result = 0
        for j in range(len(x.columns)):
            result += x.iat[i, j] * weights[j]
        ans.append(result)
    return ans

In [100]:
def getLeastSquaredError(dataAnswers, modelPredictions) -> float:
    error = 0
    for i in range(len(dataAnswers)):
        error += (dataAnswers.iat[i] - modelPredictions[i])
    return error

In [90]:
def getLeastSquaresGradient(x_train,y_train,predictions,weight,weight_index):
    #takes in model predictions, weight to get the gradient of, and the index of that weight within "weights"
    ans_sum = 0
    gradient = 0
    x_col = x_train.iloc[:,weight_index]
    for i in range(len(y_train)):
        ans_sum += (y_train.iat[i] - predictions[i]) * x_col.iat[i]
    gradient = ans_sum * -2
    return gradient

In [80]:
def updateWeights(weights, x_train, y_train, learningRate):
    predictions = getModelPredictions(x_train, weights)
    for i in range(len(weights)):
        gradient = getLeastSquaresGradient(x_train, y_train, predictions, weights[i],i)
        weights[i] -= learningRate * gradient
    return weights

In [81]:
def trainModel(x_train, y_train, learningRate = 0.01, numIterations = 30):
    weights = [1] * len(x_train.columns)
    #Initialize the weights to random numbers
    for i in range(len(weights)):
        weights[i] = random.randint(0,100)
    for i in range(numIterations):
        weights = updateWeights(weights, x_train, y_train, learningRate)
    return weights        
    

In [82]:
def testModel(weights,x,y):
    predictions = getModelPredictions(x,weights)
    error = getLeastSquaredError(y, predictions)
    print("The error is " + str(error))

In [92]:
model = trainModel(x_train,y_train)

In [None]:
predictions = getModelPredictions(x_test,model)

In [101]:
testModel(model,x_test,y_test)

The error is -3.724231981020282e+248


In [None]:
y_train.head()

In [None]:
x_test.head()

In [None]:
y_test.head()

In [None]:
print(y_train)
print(y_train.iloc[0])

In [None]:
print(len(x_train))
print(len(y_train))