## Imports

In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler

## Loading and Preprocessing MNIST Dataset

In [2]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)

# Subsetting the dataset to only include class '0' and class '1'
X = mnist.data[(mnist.target == '0') | (mnist.target == '1')]  
Y = mnist.target[(mnist.target == '0') | (mnist.target == '1')].astype(int)

# Standardizing the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

## Defining the Sigmoid Activation Function

In [3]:
def sigmoid(z):
    # Clip z to avoid overflow in exp
    z = np.clip(z, -500, 500)
    return 1 / (1 + np.exp(-z))

## Defining the Accuracy Metric

In [4]:
def accuracy(y_true, y_pred):
    preds = (y_pred >= 0.5).astype(int)
    return np.mean(preds == y_true)

## Implementing the Logistic Regression Algorithm

In [5]:
def logistic_regression(X, Y, learning_rate, epochs=1000):

    np.random.seed(42)
    # Initializing Weights and Bias with Random Values
    w = np.random.randn(X.shape[1]) 
    b = np.random.randn()

    for epoch in range(epochs):
        Z = np.dot(X, w) + b 
        phiZ = sigmoid(Z)
        # Clipping phiZ to avoid log(0) issues
        clipped_PhiZ = np.clip(phiZ, 1e-15, 1 - 1e-15)
        # Cross Entropy Loss Function
        Loss = np.mean((-Y * np.log(clipped_PhiZ)) - ((1 - Y) * np.log(1 - clipped_PhiZ))) 
        # Gradient Descent Update for Weights
        w = w - (learning_rate * ((phiZ - Y).T @ X) / len(Y))
        # Gradient Descent Update for Bias
        b = b - learning_rate * np.mean(phiZ - Y) 
        # Early Stopping Condition                         
        if Loss <= 10**-8: 
            break 
    return w, b

## K-Fold Cross Validation Split

In [6]:
def k_fold_split(X, Y, k=10):
    indices = np.arange(len(X))
    np.random.shuffle(indices)
    fold_size = len(X) // k
    folds = []

    for i in range(k):
        val_idx = indices[i * fold_size:(i + 1) * fold_size]
        train_idx = np.setdiff1d(indices, val_idx)
        folds.append((train_idx, val_idx))
    return folds

## Evaluating the Model with Cross-Validation

In [7]:
def evaluate_model(X, Y, learning_rate, folds):
    acc_scores = []
    for train_idx, val_idx in folds:
        X_train, Y_train = X[train_idx], Y[train_idx]
        X_val, Y_val = X[val_idx], Y[val_idx]

        # Train on training fold
        w, b = logistic_regression(X_train, Y_train, learning_rate)

        # Validate
        Z = np.dot(X_val, w) + b
        Y_pred = sigmoid(Z)
        acc_scores.append(accuracy(Y_val, Y_pred))
    return np.mean(acc_scores)

In [8]:
# Using different learning rates to evaluate the model's performance
learning_rates = [0.001, 0.01, 0.1, 0.0001]
folds = k_fold_split(X, Y, k=10)

results = []
for lr in learning_rates:
    avg_acc = evaluate_model(X, Y, lr, folds)
    results.append(avg_acc)
    print(f"Learning Rate: {lr} -> Average Accuracy over Folds: {avg_acc:.4f}")

Learning Rate: 0.001 -> Average Accuracy over Folds: 0.9461
Learning Rate: 0.01 -> Average Accuracy over Folds: 0.9838
Learning Rate: 0.1 -> Average Accuracy over Folds: 0.9933
Learning Rate: 0.0001 -> Average Accuracy over Folds: 0.9162
