In [4]:
#importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
# Load training data
X_train_df = pd.read_csv('./Training Data/Logistic_X_Train.csv', skiprows=1, header=None)
Y_train_df = pd.read_csv('./Training Data/Logistic_Y_Train.csv', skiprows=1, header=None)

X_train = X_train_df.values  # shape (n_samples, 3)
Y_train = Y_train_df.values.flatten()  # shape (n_samples,)

In [6]:
# Load test data
X_test_df = pd.read_csv('./Testing Data/Logistic_X_Test.csv', skiprows=1, header=None)
X_test = X_test_df.values

In [7]:
# Initialize weights and bias
W = 2 * np.random.random((X_train.shape[1],))
b = 5 * np.random.random()

In [8]:
# Logistic regression functions
def sigmoid(h):
    return 1.0 / (1.0 + np.exp(-h))

def hypothesis(x, w, b):
    return sigmoid(np.dot(x, w) + b)

def error(y, x, w, b):
    m = x.shape[0]
    err = 0.0
    for i in range(m):
        hx = hypothesis(x[i], w, b)
        err += y[i] * np.log2(hx) + (1 - y[i]) * np.log2(1 - hx)
    return -err / m

def get_grad(x, w, b, y):
    grad_b = 0.0
    grad_w = np.zeros(w.shape)
    m = x.shape[0]
    for i in range(m):
        hx = hypothesis(x[i], w, b)
        grad_w += (y[i] - hx) * x[i]
        grad_b += (y[i] - hx)
    grad_w /= m
    grad_b /= m
    return grad_w, grad_b

def gradient_descent(x, y, w, b, learning_rate=0.01):
    err = error(y, x, w, b)
    grad_w, grad_b = get_grad(x, w, b, y)
    w = w + learning_rate * grad_w
    b = b + learning_rate * grad_b
    return err, w, b

def predict(x, w, b):
    return 1 if hypothesis(x, w, b) >= 0.5 else 0

In [None]:
# Train the model
loss = []
for i in range(1000):
    l, W, b = gradient_descent(X_train, Y_train, W, b, learning_rate=0.1)
    loss.append(l)

In [None]:
# Predict on test data
Y_pred = np.array([predict(x, W, b) for x in X_test])
pd.DataFrame({'label': Y_pred}).to_csv('Y_pred.csv', index=False)