In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [2]:
# --- helper functions ---
def batch_iter(n_samples, batch_size, rng):
    while True:
        idx = np.arange(n_samples)
        rng.shuffle(idx)
        for start in range(0, n_samples, batch_size):
            yield idx[start:start + batch_size]

class MiniBatchLogReg:
    def __init__(self, dim, rng=None):
        self.rng = np.random.default_rng() if rng is None else rng
        self.w = self.rng.normal(scale=0.01, size=dim)

    @staticmethod
    def sigmoid(z):
        return 1.0 / (1.0 + np.exp(-z))

    def loss_grad(self, X, y):
        z = X @ self.w
        preds = self.sigmoid(z)
        eps = 1e-12
        loss = -np.mean(y*np.log(preds+eps) + (1-y)*np.log(1-preds+eps))
        grad = X.T @ (preds - y) / len(y)
        return loss, grad

    def fit(self, X, y, batch_size=32, lr=0.01, max_iters=5000, verbose=False):
        n = len(y)
        rng = self.rng
        biter = batch_iter(n, batch_size, rng)
        for it in range(max_iters):
            idx = next(biter)
            loss, grad = self.loss_grad(X[idx], y[idx])
            self.w -= lr * grad
            if verbose and it % 1000 == 0:
                print(f"iter {it} loss {loss:.4f}")

    def predict_proba(self, X):
        return self.sigmoid(X @ self.w)

    def predict(self, X):
        return (self.predict_proba(X) >= 0.5).astype(int)

In [3]:
# (a) load and preprocess dataset ---
X, y = load_breast_cancer(return_X_y=True)

# (b) train-val-test split
# stratified train (60%) temp (40%)
X_train, X_tmp, y_train, y_tmp = train_test_split(
    X, y, test_size=0.4, stratify=y, random_state=42)

# split temp into val and test (each 20% total)
X_val, X_test, y_val, y_test = train_test_split(
    X_tmp, y_tmp, test_size=0.5, stratify=y_tmp, random_state=42)

# standardize features based on training set
scaler = StandardScaler().fit(X_train)

X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# (c) - class counts
train_counts = np.bincount(y_train)
val_counts = np.bincount(y_val)
test_counts = np.bincount(y_test)

print("Class counts:")
print("Train:", train_counts)
print("Val:", val_counts)
print("Test:", test_counts)


Class counts:
Train: [127 214]
Val: [42 72]
Test: [43 71]


In [4]:
# hyperparameter grid
learning_rates = [1e-1, 1e-2, 1e-3]
batch_sizes = [16, 64, 128]

results = []
for lr in learning_rates:
    for bs in batch_sizes:
        model = MiniBatchLogReg(dim=X_train.shape[1], rng=np.random.default_rng(0))
        model.fit(X_train, y_train, batch_size=bs, lr=lr, max_iters=5000, verbose=False)

        # evaluate on train and test
        for split_name, X_split, y_split in [
            ("train", X_train, y_train),
            ("test", X_test, y_test),
        ]:
            y_pred = model.predict(X_split)
            acc = accuracy_score(y_split, y_pred)
            prec, rec, f1, _ = precision_recall_fscore_support(
                y_split, y_pred, average='binary', zero_division=0)
            results.append({
                "lr": lr,
                "batch_size": bs,
                "split": split_name,
                "accuracy": acc,
                "precision": prec,
                "recall": rec,
                "f1": f1
            })

results_df = pd.DataFrame(results)
display(results_df)

Unnamed: 0,lr,batch_size,split,accuracy,precision,recall,f1
0,0.1,16,train,0.985337,0.981567,0.995327,0.988399
1,0.1,16,test,0.982456,0.985915,0.985915,0.985915
2,0.1,64,train,0.985337,0.981567,0.995327,0.988399
3,0.1,64,test,0.982456,0.985915,0.985915,0.985915
4,0.1,128,train,0.985337,0.981567,0.995327,0.988399
5,0.1,128,test,0.982456,0.985915,0.985915,0.985915
6,0.01,16,train,0.985337,0.981567,0.995327,0.988399
7,0.01,16,test,0.973684,0.985714,0.971831,0.978723
8,0.01,64,train,0.985337,0.981567,0.995327,0.988399
9,0.01,64,test,0.973684,0.985714,0.971831,0.978723


Comment:
- The largest learning rate 1e-1 generalizes the test set the best. 
- The batch size does not effect the final result a lot. 