## Question 3

In [11]:
import numpy as np

class LogisticRegression:
    def __init__(self, learning_rate=0.01, batch_size=32, max_iters=1000):
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.max_iters = max_iters
        self.weights = None
        self.bias = None
    
    def sigmoid(self, z):
        """Sigmoid function to map any real value to (0, 1)"""
        return 1 / (1 + np.exp(-z))
    
    def loss(self, y_true, y_pred):
       
        m = len(y_true)
        return - (1/m) * np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    def initialize_weights(self, n_features):
        """Initialize the weights and bias"""
        self.weights = np.zeros(n_features)
        self.bias = 0
    
    def forward(self, X):
        """Forward pass: compute predictions using sigmoid"""
        linear_model = np.dot(X, self.weights) + self.bias
        y_pred = self.sigmoid(linear_model)
        return y_pred
    
    def compute_gradient(self, X, y_true, y_pred):
        """Compute the gradients for weights and bias"""
        m = X.shape[0]  # Batch size
        dw = (1/m) * np.dot(X.T, (y_pred - y_true))  # Gradient for weights
        db = (1/m) * np.sum(y_pred - y_true)         # Gradient for bias
        return dw, db
    
    def update_weights(self, dw, db):
        """Update weights using gradient descent"""
        self.weights -= self.learning_rate * dw
        self.bias -= self.learning_rate * db
    
    def train(self, X, y):
        """Training the logistic regression model using mini-batch SGD"""
        n_samples, n_features = X.shape
        self.initialize_weights(n_features)
        
        for i in range(self.max_iters):
            # Shuffle the data
            indices = np.random.permutation(n_samples)
            X_shuffled = X[indices]
            y_shuffled = y[indices]
            
            # Mini-batch training
            for start in range(0, n_samples, self.batch_size):
                end = start + self.batch_size
                X_batch = X_shuffled[start:end]
                y_batch = y_shuffled[start:end]
                
                # Forward pass
                y_pred = self.forward(X_batch)
                
                # Compute gradients
                dw, db = self.compute_gradient(X_batch, y_batch, y_pred)
                
                # Update weights and bias
                self.update_weights(dw, db)
    
    def predict(self, X):
        """Make predictions on new data"""
        y_pred = self.forward(X)
        y_pred_class = [1 if i > 0.5 else 0 for i in y_pred]
        return np.array(y_pred_class)


## Question 4

### (a)

In [12]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load Wisconsin Breast Cancer dataset from scikit-learn
data = load_breast_cancer()
X = data.data
y = data.target

### (b)

In [13]:

# Split the dataset into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

### (c)

In [14]:
class_counts_train_val = np.bincount(np.concatenate([y_train, y_val]))
print(f"Class distribution in training (+ validation) set: {class_counts_train_val}")

Class distribution in training (+ validation) set: [196 316]


### (d,e)

In [15]:

model1 = LogisticRegression(learning_rate=0.01, batch_size=16, max_iters=1000)
model1.train(X_train, y_train)


y_pred1 = model1.predict(X_test)


accuracy = accuracy_score(y_test, y_pred1)
precision = precision_score(y_test, y_pred1)
recall = recall_score(y_test, y_pred1)
f1 = f1_score(y_test, y_pred1)

accuracy = accuracy_score(y_test, y_pred1)
precision = precision_score(y_test, y_pred1)
recall = recall_score(y_test, y_pred1)
f1 = f1_score(y_test, y_pred1)
print(f"Test Set Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

model2 = LogisticRegression(learning_rate=0.001, batch_size=8, max_iters=1000)
model2.train(X_train, y_train)


y_pred2 = model2.predict(X_test)


accuracy = accuracy_score(y_test, y_pred2)
precision = precision_score(y_test, y_pred2)
recall = recall_score(y_test, y_pred2)
f1 = f1_score(y_test, y_pred2)
print(f"Test Set Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

model3 = LogisticRegression(learning_rate=0.0001, batch_size=32, max_iters=1000)
model3.train(X_train, y_train)


y_pred3 = model3.predict(X_test)


accuracy = accuracy_score(y_test, y_pred3)
precision = precision_score(y_test, y_pred3)
recall = recall_score(y_test, y_pred3)
f1 = f1_score(y_test, y_pred3)
print(f"Test Set Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

  return 1 / (1 + np.exp(-z))


Test Set Performance:
Accuracy: 0.9474
Precision: 1.0000
Recall: 0.9268
F1 Score: 0.9620


  return 1 / (1 + np.exp(-z))


Test Set Performance:
Accuracy: 0.9123
Precision: 1.0000
Recall: 0.8780
F1 Score: 0.9351
Test Set Performance:
Accuracy: 0.8246
Precision: 1.0000
Recall: 0.7561
F1 Score: 0.8611


Based on the accuracy scores from using the test set on each of the 3 mode3ls, it appears that a batch size of 32 is the best gradient descent method, which makes intiutive sense because the more information you feed the model, in terms of data points to compute gradients, the more accurate the gradient descent will be. However, if the bath size goes too high, then the computational cost can become too costly. 