In [13]:
import numpy as np
from keras.datasets import mnist

# Dividing the data into training and testing
(X_train_full, y_train_full), (X_test, y_test) = mnist.load_data()

X = X_train_full[:10000].reshape(-1, 28*28)
y = y_train_full[:10000]

mean = X.mean(axis=0)
std = X.std(axis=0) + 1e-10 
X = (X - mean) / std

In [14]:
# Creating the K-Fold
k = 10
fold_size = len(X) // k
folds_X = [X[i*fold_size:(i+1)*fold_size] for i in range(k)]
folds_y = [y[i*fold_size:(i+1)*fold_size] for i in range(k)]

In [17]:
# Train and validate the model with different learning rates 
learning_rates = [0.1, 0.01, 0.001, 0.0001]
for lr in learning_rates:
    accuracy = []
    for i in range(k):
        X_val = folds_X[i]
        y_val = folds_y[i]
        X_train = np.concatenate(folds_X[:i] + folds_X[i+1:], axis=0)
        y_train = np.concatenate(folds_y[:i] + folds_y[i+1:], axis=0)

        w = np.random.randn(784, 10)
        b = np.zeros(10)

        # Train the logistic regression model
        for epoch in range(50):
            for j in range(len(X_train)):
                z = X_train[j] @ w + b

                exp_z = np.exp(z)
                sum_exp_z = np.sum(exp_z)
                p = exp_z / sum_exp_z

                dL_dp = p
                dL_dp[y_train[j]] -= 1
                dp_dz = np.diag(p) - np.outer(p, p)
                dz_dw = X_train[j]
                dz_db = 1
                dL_dw = dz_dw.reshape(-1, 1) @ (dL_dp @ dp_dz).reshape(1, -1)
                dL_db = (dL_dp @ dp_dz) * dz_db

                w -= lr * dL_dw
                b -= lr * dL_db

        # Validate the model
        z_val = X_val @ w + b
        y_pred = np.argmax(z_val, axis=1)
        acc = np.mean(y_pred == y_val)
        accuracy.append(acc)

    # Print out the accuracy for each learning rate
    mean_acc = np.mean(accuracy)
    std_acc = np.std(accuracy)
    print(f"Learning rate: {lr}, Accuracy: {mean_acc:.3f}")

  exp_z = np.exp(z)
  p = exp_z / sum_exp_z


KeyboardInterrupt: ignored