In [12]:
import pandas as pd
import numpy as np

In [13]:
def predict(w, b, X):
    z = np.dot(X, w) + b
    probs = 1 / (1 + np.exp(-z))
    return (probs >= 0.5).astype(int)

def predict_prop( w, b, X):
    z = np.dot(X, w) + b
    sigmoid = 1 / (1 + np.exp(-z))
    return sigmoid

def compute_cost(w, b, X, y):
    m = X.shape[0]
    f_wb = predict_prop(w, b, X)
    cost = -1/m * np.sum(y * np.log(f_wb) + (1 - y) * np.log(1 - f_wb))
    return cost

def compute_gradient(w, b, X, y):
    m = X.shape[0]
    f_wb = predict_prop(w, b, X)
    
    dj_dw = 1/m * np.dot(X.T, (f_wb - y))
    dj_db = 1/m * np.sum(f_wb - y)
    
    return dj_dw, dj_db

def gradient_descent(lr, x, y, w, b, previous_cost, threshold, max_iters=1000000):
    m = x.shape[0]
    iters = 0
    cost_log = []
    iteration_log = []
    weight_log = []
    bias_log = []

    while True:
        
        dj_dw, dj_db = compute_gradient(x, y, w, b)
        w -= (lr/m) * dj_dw
        b -= (lr/m) * dj_db

        current_cost = compute_cost(x, y, w, b)

        if iters % 100 == 0:
            print(f"Iteration {iters} | Cost: {current_cost:.5f} | w: {w.ravel()} | b: {b:.5f}")
        #     if iters < 1001 and iters % 200 == 0 and iters > 1:
        #         cost_log.append(current_cost)
        #         iteration_log.append(iters)
        #         weight_log.append(w.flatten().tolist())  # Save as list
        #         bias_log.append(b)
        #     elif iters < 10000 and iters > 1001 and iters % 1000 == 0:
        #         cost_log.append(current_cost)
        #         iteration_log.append(iters)
        #         weight_log.append(w.flatten().tolist())  # Save as list
        #         bias_log.append(b)
        
        # if iters % 10000 == 0:
        #     cost_log.append(current_cost)
        #     iteration_log.append(iters)
        #     weight_log.append(w.flatten().tolist())  # Save as list
        #     bias_log.append(b)
            
        if abs(current_cost-previous_cost) < threshold:
            print(f"Converged in {iters} iterations.")
            break

        previous_cost = current_cost
        iters += 1
        if iters >= max_iters:
            print("Stopped: Max iterations reached.")
            break


    return w , b #,cost_log , iteration_log , weight_log , bias_log

In [14]:
# Load data
df = pd.read_csv("cleaned_titanic_data.csv")

# Separate features (X) and target (y)
X = df.drop('Survived', axis=1).values
y = df['Survived'].values.reshape(-1, 1)


# Step 1: Shuffle the data
np.random.seed(42)  # for reproducibility
indices = np.arange(X.shape[0])
np.random.shuffle(indices)

X = X[indices]
y = y[indices]

# Step 2: 80/20 split
split_index = int(0.8 * X.shape[0])
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Step 3: Z-score normalization (standardization) for X
#mean = np.mean(X_train, axis=0)
#std = np.std(X_train, axis=0)

#X_train_scaled = (X_train - mean) / std
#X_test_scaled = (X_test - mean) / std  # use train mean & std!

# Step 4: Initialize parameters
m, n = X_train.shape
w = np.random.uniform(low=-1.0, high=1.0, size=(n, 1))
b = 0.0

In [15]:
wf , bf = gradient_descent(0.02, X_train, y_train, w, b, previous_cost=compute_cost(w, b, X_train, y_train), threshold=1e-5, max_iters=1000000)

  cost = -1/m * np.sum(y * np.log(f_wb) + (1 - y) * np.log(1 - f_wb))
  cost = -1/m * np.sum(y * np.log(f_wb) + (1 - y) * np.log(1 - f_wb))


ValueError: shapes (10,1) and (705,10) not aligned: 1 (dim 1) != 705 (dim 0)