In [2]:
import torch
import polars as pl
import numpy as np
import csv
from enum import Enum


Here we define our Hyperparameters, Global Constants, and Config Options

In [37]:
# HYPERPARAMETERS
LEARNING_RATE = 0.005
INITIAL_WEIGHT = 0
INITIAL_BIAS = 0

#                          |     0      |     1   |    2  |   3  |   4  |    5   |    6   |     7   |    8  |    9   |      10   |
COLUMNS = Enum('COLUMNS', ['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'])

# CONFIG
DATASET_NAME = "train.csv"
# MUST BE ONE OF THESE VALUES: PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
TRAINING_COLUMN = 0

This will read the data into a polars dataframe and then return it

In [38]:
def read_and_format_data(filename):
    read_data = pl.read_csv("data/" + filename)
    return read_data

In [39]:
def compute_cost(x, y, w, b):
   
    m = len(x)
    cost = 0
    
    for i in range(m):
        f_wb = w * x[i] + b
        cost = cost + (f_wb - y[i])**2
    total_cost = 1 / (2 * m) * cost

    return total_cost

def compute_gradient(x, y, w, b):
    m = len(x)
    w_g = 0
    d_g = 0

    for i in range(m):
        pred = w * x[i] + b
        w_df = (pred - y[i]) * x[i]
        d_df = (pred - y[i])
        w_g += w_df
        d_g += d_df
    w_g /= m
    d_g /= m

    return w_g, d_g

def gradient_descent(x, y, w_initial, b_initial, num_epochs):
    w = w_initial
    b = b_initial
    for i in range(num_epochs):
        w_g, d_g = compute_gradient(x, y, w, b)
        w = w - LEARNING_RATE * w_g
        b = b - LEARNING_RATE * d_g

        if i % 10 == 0:
            loss = compute_cost(x, y, w, b)
            print(f"Run: {i} Loss: {loss}")
    return w, b

Here we can actually run our model and get an output

In [40]:
input = read_and_format_data(DATASET_NAME)
target = read_and_format_data("test.csv")
column = "Pclass"
x = []
y = []
test_input = []
test_output = []
for i in range(len(input[column])):
    x.append(input[column][i])
for i in range(len(input["Survived"])):
    y.append(input["Survived"][i])
for i in range(len(target[column])):
    test_input.append(target[column][i])
    
w, b = gradient_descent(x ,y, 0, 0, 10000)

for i in test_input:
    pred = w * i + b
    test_output.append(pred)
print(test_output)


Run: 0 Loss: 0.18844119660200134
Run: 10 Loss: 0.1642418756994117
Run: 20 Loss: 0.15210268065444796
Run: 30 Loss: 0.1459242799053586
Run: 40 Loss: 0.14269294582492908
Run: 50 Loss: 0.14091956283254234
Run: 60 Loss: 0.13986833373301794
Run: 70 Loss: 0.13917567977765635
Run: 80 Loss: 0.13866193733506377
Run: 90 Loss: 0.1382383163015593
Run: 100 Loss: 0.1378609277989062
Run: 110 Loss: 0.13750806903515927
Run: 120 Loss: 0.13716899964255003
Run: 130 Loss: 0.13683839600989198
Run: 140 Loss: 0.1365136111396433
Run: 150 Loss: 0.13619332072832752
Run: 160 Loss: 0.13587685418661188
Run: 170 Loss: 0.13556386409144314
Run: 180 Loss: 0.13525416285913097
Run: 190 Loss: 0.1349476420430703
Run: 200 Loss: 0.13464423245678572
Run: 210 Loss: 0.1343438844689364
Run: 220 Loss: 0.1340465582654075
Run: 230 Loss: 0.13375221903620768
Run: 240 Loss: 0.133460834595777
Run: 250 Loss: 0.13317237420560807
Run: 260 Loss: 0.13288680799101377
Run: 270 Loss: 0.13260410665145678
Run: 280 Loss: 0.13232424131593534
Run: 2