In [None]:
"""
Notebook for logistic regression
"""

In [None]:
import pandas as pd
import numpy as np
from logistic_cost_function import logistic_cost_function
from logistic_cost_function import sigmoid
from logistic_gradient_descent import logistic_gradient_descent

In [None]:
# Load data from Excel files
train_data = pd.read_excel('train_data.xlsx')
cv_data = pd.read_excel('cv_data.xlsx')
test_data = pd.read_excel('test_data.xlsx')

In [None]:
# Separate target variable (y) and features (X)
y_train = train_data['target_column']
X_train = train_data.drop('target_column', axis=1)

y_cv = cv_data['target_column']
X_cv = cv_data.drop('target_column', axis=1)

y_test = test_data['target_column']
X_test = test_data.drop('target_column', axis=1)

In [None]:
# Add a column of ones to the feature matrices for the bias term
X_train = np.column_stack((np.ones(len(X_train)), X_train))
X_cv = np.column_stack((np.ones(len(X_cv)), X_cv))
X_test = np.column_stack((np.ones(len(X_test)), X_test))

In [None]:
# Initialize parameters
theta = np.zeros(X_train.shape[1])

In [None]:
# Set hyperparameters
alpha = 0.01
num_iterations = 1000

In [None]:
# Train the model using gradient descent
theta = logistic_gradient_descent(X_train, y_train, theta, alpha, num_iterations)

In [None]:
# Predictions on training, cv, and test sets
y_train_pred = sigmoid(np.dot(X_train, theta))
y_cv_pred = sigmoid(np.dot(X_cv, theta))
y_test_pred = sigmoid(np.dot(X_test, theta))

In [None]:
# Calculate cost on training, cv, and test sets
cost_train = logistic_cost_function(y_train, y_train_pred)
cost_cv = logistic_cost_function(y_cv, y_cv_pred)
cost_test = logistic_cost_function(y_test, y_test_pred)

print(f"Final theta: {theta}")
print(f"Cost on training set: {cost_train}")
print(f"Cost on cross-validation set: {cost_cv}")
print(f"Cost on test set: {cost_test}")