In [1]:
from keras.datasets import mnist
import numpy as np
from matplotlib.pyplot import plot as plt
import sys

In [2]:
def standardize(x):
    return (x - np.mean(x)) / np.std(x)

In [3]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [4]:
def predict(X, W, b):
    return sigmoid(np.dot(X, W) + b)

In [5]:
def cost(y, y_hat):
    return np.mean(-y * np.log(y_hat + sys.float_info.min) - (1 - y) * np.log(1 - y_hat + sys.float_info.min))

In [6]:
def train(X, y, learning_rate, epochs):
    W = np.random.random(X.shape[1])
    b = np.random.random()
    costs = np.array([])
    for i in range(epochs):
        y_hat = predict(X, W, b)
        dW = np.dot(X.T, (y_hat - y)) / len(X)
        db = np.mean(y_hat - y)
        W -= learning_rate * dW
        b -= learning_rate * db
        if i % 100 == 0:
            c: float = cost(y, y_hat)
            costs = np.append(costs, c)
            print(f"Cost at epoch {i}: {c}")
    return W, b, costs

In [7]:
def test(X, y, W, b):
    y_hat = predict(X, W, b)
    y_hat = np.round(y_hat)
    return accuracy(y, y_hat)

In [8]:
def k_fold_cross_validation(X, y, k, lr, epochs):
    fold_size = len(X) // k
    accuracies = []
    for i in range(k):
        # Divide the data into training and testing sets
        X_train = np.concatenate([X[:i * fold_size], X[(i + 1) * fold_size:]])
        Y_train = np.concatenate([y[:i * fold_size], y[(i + 1) * fold_size:]])
        X_test = X[i * fold_size:(i + 1) * fold_size]
        Y_test = y[i * fold_size:(i + 1) * fold_size]
        # Train the model
        print(f"Fold {i + 1} of {k}:")
        w, b, c = train(X_train, Y_train, lr, epochs)
        # Test the model
        print("Testing...")
        acc = test(X_test, Y_test, w, b)
        accuracies.append(acc)
    return np.mean(accuracies), c

In [9]:
def accuracy(y, y_hat):
    return np.mean(y == y_hat)

In [10]:
trainSet, testSet = mnist.load_data()

In [11]:
xTrain = trainSet[0].astype('float32')
yTrain = trainSet[1].astype('int32')
xTrain = xTrain.reshape(xTrain.shape[0], -1)
xTrain = np.concatenate([xTrain[yTrain == 0], xTrain[yTrain == 1]])
yTrain = np.concatenate([yTrain[yTrain == 0], yTrain[yTrain == 1]])
xTrain = standardize(xTrain)

In [None]:
average_accuracy, c = k_fold_cross_validation(xTrain, yTrain, 10, 0.01, 1000)
print(average_accuracy)
print(c)

Cost at epoch 0: 241.38995581829113
Cost at epoch 100: 14.49978947543174
Cost at epoch 200: 4.85079782629973
Cost at epoch 300: 3.1230496439639532
Cost at epoch 400: 2.2848084427158093
Cost at epoch 500: 2.0918444864745838
Cost at epoch 600: 1.594845039580795
Cost at epoch 700: 1.3579755145342982
Cost at epoch 800: 1.1958911373346708
Cost at epoch 900: 1.162807578881441
Cost at epoch 0: 234.12104202523554
Cost at epoch 100: 13.462075429803967
Cost at epoch 200: 5.712521780786398
Cost at epoch 300: 3.6492240819886863
Cost at epoch 400: 2.4000777223229797
Cost at epoch 500: 1.7246684573547368
Cost at epoch 600: 1.5755453595109286
Cost at epoch 700: 1.2174808844465321
Cost at epoch 800: 1.170252479893867
Cost at epoch 900: 1.0757132596427257
Cost at epoch 0: 244.71870952205433
Cost at epoch 100: 13.476920922233576
Cost at epoch 200: 4.977631870793169
Cost at epoch 300: 2.9653106533082036
Cost at epoch 400: 2.1276714971600157
Cost at epoch 500: 1.639838057104583
Cost at epoch 600: 1.437679

In [None]:
#graph the cost
plt(c)

In [None]:
o = 0
h = np.log(o + 1e-15)
h