## Data Preprocessing

In [29]:

import numpy as np
import pandas as pd
import math

data = pd.read_csv('titanic.csv')
data = data.sample(frac=1)
x = data[["Age", "Pclass", "Fare"]].to_numpy()
y = data["Survived"].to_numpy()
datalen = len(x)
split_percent = 0.10    #10% used for testing data 
split = math.floor(datalen*split_percent)

#no need to shuffle, data already randomized in the .csv file
x_test = x[:split].T
x_train = x[split:].T
y_test = y[:split]
y_train = y[split:]

#print(x_test)


## 

In [33]:

#hyperparams
epochs = 1000
learning_rate = 0.003

#init our weight vector based on how many features we have in the data
weights = np.zeros(x.shape[1])
bias = 0

def sigmoid(X):
    return 1/(1+np.exp(-X))

def forward(X, weights, bias):
    return sigmoid(np.dot(weights.T, X) + bias)

def loss(Y, A):
    return Y*np.log(A) + (1-Y)*np.log(1-A)

def cost(Y, A):
    return -1/len(A) * np.sum(loss(Y, A))

#partial derivitive of cost with respect to weights
def dCostWRTw(X, A, Y):
    return np.dot(X, (A-Y).T)/len(A)

#partial derivitive of cost with respect to bias
def dCostWRTb(A, Y):
    return np.sum(A-Y)/len(A)

# Compute A
# Convert the entries of a into 0 (if activation <= 0.5) or 1 (if activation > 0.5) and store the predictions in a vector.
def predict(X, weights, bias):
    A = forward(X, weights, bias)
    p = np.copy(A)
    p[p <= 0.5] = 0
    p[p > 0.5] = 1
    return p 

def accuracy(predictions, labels):
    return np.count_nonzero(predictions==labels)/len(predictions)

#training loop
for epoch in range(epochs):
    A = forward(x_train, weights, bias)
    c = cost(y_train, A)
    dw = dCostWRTw(x_train, A, y_train)
    db = dCostWRTb(A, y_train)
    weights = weights - learning_rate * dw
    bias = bias - learning_rate * db
    predictions = predict(x_train, weights, bias)
    acc = accuracy(predictions, y_train)
    print(f"Epoch: {epoch}, Cost: {c}, Acc: {acc}")




Epoch: 0, Cost: 0.6931471805599453, Acc: 548
Epoch: 1, Cost: 0.6501453837350775, Acc: 516
Epoch: 2, Cost: 0.6386961536980923, Acc: 540
Epoch: 3, Cost: 0.6342806706556493, Acc: 524
Epoch: 4, Cost: 0.6309478734909855, Acc: 537
Epoch: 5, Cost: 0.6295340092534442, Acc: 526
Epoch: 6, Cost: 0.6285800418254359, Acc: 534
Epoch: 7, Cost: 0.6281733430339681, Acc: 530
Epoch: 8, Cost: 0.6279479375511194, Acc: 531
Epoch: 9, Cost: 0.627828164672903, Acc: 530
Epoch: 10, Cost: 0.627748617195476, Acc: 530
Epoch: 11, Cost: 0.6276875484023479, Acc: 530
Epoch: 12, Cost: 0.6276341677834227, Acc: 530
Epoch: 13, Cost: 0.627584260824078, Acc: 530
Epoch: 14, Cost: 0.6275359670088333, Acc: 530
Epoch: 15, Cost: 0.6274884910351931, Acc: 530
Epoch: 16, Cost: 0.6274414748492855, Acc: 530
Epoch: 17, Cost: 0.6273947541294937, Acc: 530
Epoch: 18, Cost: 0.6273482510986615, Acc: 530
Epoch: 19, Cost: 0.6273019280864264, Acc: 530
Epoch: 20, Cost: 0.6272557664311899, Acc: 529
Epoch: 21, Cost: 0.6272097566688468, Acc: 529
E