In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Load datasets
X = pd.read_csv("logisticX.csv").values  # Convert to numpy array
y = pd.read_csv("logisticY.csv").values  # Convert to numpy array

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Compute cost function
def compute_cost(X, y, theta):
    m = len(y)
    h = sigmoid(X @ theta)
    cost = (-1/m) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))
    return cost

# Gradient descent for logistic regression
def gradient_descent(X, y, theta, alpha, num_iters):
    m = len(y)
    cost_history = []
    
    for _ in range(num_iters):
        gradient = (1/m) * (X.T @ (sigmoid(X @ theta) - y))
        theta -= alpha * gradient
        cost_history.append(compute_cost(X, y, theta))
    
    return theta, cost_history

# Feature scaling
scaler = StandardScaler()
X = scaler.fit_transform(X)
m, n = X.shape
X = np.c_[np.ones((m, 1)), X]  # Add intercept term

# Initialize parameters
theta = np.zeros((n + 1, 1))
alpha = 0.1  # Learning rate
num_iters = 1000  # Iterations for gradient descent

# Train logistic regression model
final_theta, cost_history = gradient_descent(X, y, theta, alpha, num_iters)

# Final cost and learned parameters
final_cost = compute_cost(X, y, final_theta)
print("Final Cost Function Value:", final_cost)
print("Learned Parameters:", final_theta.ravel())

# Plot decision boundary
plt.scatter(X[:, 1], X[:, 2], c=y.flatten(), cmap='bwr', edgecolors='k')

x_vals = np.array([X[:, 1].min(), X[:, 1].max()])
y_vals = -(final_theta[0] + final_theta[1] * x_vals) / final_theta[2]
plt.plot(x_vals, y_vals, 'k', label='Decision Boundary')
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.legend()
plt.show()

# Plot cost function vs iteration
plt.figure()
plt.plot(range(num_iters), cost_history, 'b-', linewidth=2)
plt.xlabel("Number of Iterations")
plt.ylabel("Cost Function Value")
plt.title("Cost Function vs Iterations")
plt.grid(True)
plt.show()

# Train model with different learning rates
num_iters = 100  # Only train for 100 iterations

# Train with alpha = 0.1
theta_1 = np.zeros((n + 1, 1))
theta_1, cost_history_1 = gradient_descent(X, y, theta_1, alpha=0.1, num_iters=num_iters)

# Train with alpha = 5
theta_2 = np.zeros((n + 1, 1))
theta_2, cost_history_2 = gradient_descent(X, y, theta_2, alpha=5, num_iters=num_iters)

# Plot cost function vs iteration for both learning rates
plt.figure()
plt.plot(range(num_iters), cost_history_1, 'b-', label='Alpha = 0.1', linewidth=2)
plt.plot(range(num_iters), cost_history_2, 'r-', label='Alpha = 5', linewidth=2)
plt.xlabel("Number of Iterations")
plt.ylabel("Cost Function Value")
plt.title("Cost Function vs Iterations for Different Learning Rates")
plt.legend()
plt.grid(True)
plt.show()

# Predict class labels using final learned parameters
y_pred = sigmoid(X @ final_theta) >= 0.5  # Convert probabilities to binary (0 or 1)

# Compute confusion matrix
conf_matrix = confusion_matrix(y, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Compute evaluation metrics
accuracy = accuracy_score(y, y_pred)
precision = precision_score(y, y_pred)
recall = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
