In [5]:
#Question 1

import numpy as np
import matplotlib.pyplot as plt

#Sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
 
def compute_cost(X, y, theta):
    m = len(y)
    h = sigmoid(X @ theta)
    
    #Add epsilon to not log 0 
    epsilon = 1e-10
    
    #Cost function, log likelihood
    cost = (-1/m) * (y.T @ np.log(h + epsilon) + (1 - y).T @ np.log(1 - h + epsilon))
    return float(cost)

#Gradient descent
def gradient_descent(X, y, theta, alpha, iterations):
    m = len(y)
    cost_history = []
    
    for i in range(iterations):
        h = sigmoid(X @ theta)
        
        #Gradient is derivative of cost function
        gradient = (1/m) * (X.T @ (h - y))
        
        #Update theta down the slope
        theta = theta - alpha * gradient
        
        cost = compute_cost(X, y, theta)
        cost_history.append(cost)
    
    
    return theta, cost_history

#Predict function
def predict(X, theta):
    predictions = sigmoid(X @ theta)
    return (predictions >= 0.5).astype(int)

#Calculate accuracy
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred) * 100

#Make dataset 

np.random.seed(1)
m = 100  

#Make 2 groups around (-2, -2) and (2, 2)
X_class0 = np.random.randn(m//2, 2) + np.array([-2, -2])
y_class0 = np.zeros((m//2, 1))
X_class1 = np.random.randn(m//2, 2) + np.array([2, 2])
y_class1 = np.ones((m//2, 1))

#Combine the data
X = np.vstack([X_class0, X_class1])
y = np.vstack([y_class0, y_class1])

#Add column of ones so we can have an intercept
X_with_bias = np.hstack([np.ones((m, 1)), X])


theta = np.zeros((X_with_bias.shape[1], 1))
alpha = 0.1
iterations = 5000

theta_final, cost_history = gradient_descent(X_with_bias, y, theta, alpha, iterations)

#Make predictions
y_pred = predict(X_with_bias, theta_final)

#Calculate accuracy
acc = accuracy(y, y_pred)
print(f"Model Accuracy: {acc:.2f}%")


Model Accuracy: 100.00%


  return float(cost)


In [9]:
#Question 2

import numpy as np

def normal_equation(X, y):

    X = np.array(X)
    y = np.array(y).reshape(-1, 1)
    
    #Normal equation (use pseudoinverse) 
    #theta = (X^T * X)^-1 * X^T * y = pinv(X) * y
    theta = np.linalg.pinv(X) @ y
    
    #Round
    result = np.round(theta.flatten(), 4).tolist()
    
    return result

#Test with example
X = [[1, 1], [1, 2], [1, 3]]
y = [1, 2, 3]

coefficients = normal_equation(X, y)
print(f"Input X: {X}")
print(f"Input y: {y}")
print(f"Output coefficients: {coefficients}")


Input X: [[1, 1], [1, 2], [1, 3]]
Input y: [1, 2, 3]
Output coefficients: [0.0, 1.0]


In [None]:
#Question 3

#Can't use the same one as q1 since it uses sigmoid 
def gradient_descent(X, y, alpha, iterations):
    
    m = len(y)
    n = X.shape[1]
    theta = np.zeros((n, 1))
    y = y.reshape(-1, 1)
    
    #Gradient descent loop
    for i in range(iterations):
        
        predictions = X @ theta
        
        #Calculate error
        errors = predictions - y
        
        #Update theta using gradient
        gradient = (1/m) * (X.T @ errors)
        theta = theta - alpha * gradient
    
    #Round
    result = np.round(theta.flatten(), 4)
    
    return result

#Test with example
X = np.array([[1, 1], [1, 2], [1, 3]])
y = np.array([1, 2, 3])
alpha = 0.01
iterations = 1000

coefficients = gradient_descent(X, y, alpha, iterations)
print(f"Input X: {X}")
print(f"Input y: {y}")
print(f"Alpha: {alpha}")
print(f"Iterations: {iterations}")
print(f"Output coefficients: {coefficients}")

Input X: [[1 1]
 [1 2]
 [1 3]]
Input y: [1 2 3]
Alpha: 0.01
Iterations: 1000
Output coefficients: [0.1107 0.9513]


Report: 

One thing I didn't realize coming in was how much easier it is to code for linear regression compared to logistic regression. The gradient descent we had to do for logreg with the cost function, although still simple, was a lot longer than using the normal equation or gradient descent for linreg. I think this stems from the cost function, in linreg you can just use the error as your metric of evaluation. The coding principles used in this are just basic functions, matrix operations using numpy, and implementation of the proper formulas for our models. Implementing was pretty easy, just had to follow the steps for gradient descent as described, and then do the simple matrix operations to iterate with the model. The lessons I learned were how to implement both linreg and logreg from scratch without using scikitlearn or something similar, as although I've used linreg many many times in the past, I had not made it from scratch yet. 