In [None]:
import numpy as np
from PIL import Image
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten


def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        with Image.open(img_path) as img:
            img = img.resize((64, 64)).convert('L')  # Resize and convert to grayscale
            images.append(np.asarray(img) / 255.0)  # Normalize pixel values
            labels.append(label)
    return images, labels


def relu(z):
    return np.maximum(0, z)


def relu_derivative(z):
    return z > 0


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def sigmoid_derivative(z):
    s = sigmoid(z)
    return s * (1 - s)


def initialize_weights(layer_dims):
    np.random.seed(3)
    weights = {}
    L = len(layer_dims)
    for l in range(1, L):
        weights['W' + str(l)] = np.random.randn(layer_dims[l - 1], layer_dims[l]) * 0.01
        weights['b' + str(l)] = np.zeros((1, layer_dims[l]))
    return weights


def forward_propagation(X, weights):
    caches = []
    A = X
    L = len(weights) // 2
    for l in range(1, L):
        A_prev = A
        W = weights['W' + str(l)]
        b = weights['b' + str(l)]
        Z = np.dot(A_prev, W) + b
        A = relu(Z)
        caches.append((A_prev, W, b, Z))
    W = weights['W' + str(L)]
    b = weights['b' + str(L)]
    Z = np.dot(A, W) + b
    AL = sigmoid(Z)
    caches.append((A, W, b, Z))
    return AL, caches


def compute_cost(AL, Y):
    m = Y.shape[0]
    cost = -np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL)) / m
    cost = np.squeeze(cost)
    return cost


def backward_propagation(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[0]
    Y = Y.reshape(AL.shape)
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    current_cache = caches[L - 1]
    A_prev, W, b, Z = current_cache
    dZ = dAL * sigmoid_derivative(Z)
    dW = np.dot(A_prev.T, dZ) / m
    db = np.sum(dZ, axis=0, keepdims=True) / m
    dA_prev = np.dot(dZ, W.T)
    grads["dW" + str(L)] = dW
    grads["db" + str(L)] = db
    for l in reversed(range(L - 1)):
        current_cache = caches[l]
        A_prev, W, b, Z = current_cache
        dZ = dA_prev * relu_derivative(Z)
        dW = np.dot(A_prev.T, dZ) / m
        db = np.sum(dZ, axis=0, keepdims=True) / m
        if l > 0:
            dA_prev = np.dot(dZ, W.T)
        grads["dW" + str(l + 1)] = dW
        grads["db" + str(l + 1)] = db
    return grads


def update_parameters(weights, grads, learning_rate):
    L = len(weights) // 2
    for l in range(L):
        weights["W" + str(l + 1)] -= learning_rate * grads["dW" + str(l + 1)]
        weights["b" + str(l + 1)] -= learning_rate * grads["db" + str(l + 1)]
    return weights


def model(X, Y, layers_dims, learning_rate=0.0075, num_iterations=1000, print_cost=False):
    np.random.seed(1)
    costs = []
    weights = initialize_weights(layers_dims)
    for i in range(0, num_iterations):
        AL, caches = forward_propagation(X, weights)
        cost = compute_cost(AL, Y)
        grads = backward_propagation(AL, Y, caches)
        weights = update_parameters(weights, grads, learning_rate)
        if print_cost and i % 100 == 0:
            print("Cost after iteration %i: %f" % (i, cost))
            costs.append(cost)
    return weights


# Load the dataset
(X_mnist, y_mnist), (X_test_mnist, y_test_mnist) = mnist.load_data()

# Filter out only the images for digits 0 and 1
filter_indices_mnist = (y_mnist == 0) | (y_mnist == 1)
X_mnist, y_mnist = X_mnist[filter_indices_mnist], y_mnist[filter_indices_mnist]

# Normalize the images from 0-255 to 0-1
X_mnist = X_mnist / 255.0
X_test_mnist = X_test_mnist / 255.0

# Flatten the images for the MLP (multilayer perceptron)
X_mnist = X_mnist.reshape((-1, 28 * 28))
X_test_mnist = X_test_mnist.reshape((-1, 28 * 28))

# Split the dataset into training and validation sets
X_train_mnist, X_val_mnist, y_train_mnist, y_val_mnist = train_test_split(X_mnist, y_mnist, test_size=0.2, random_state=32)

# Define layer dimensions for the model
layer_dims = [X_train_mnist.shape[1], 10, 8, 8, 4, 1]

# Train the model
trained_weights_mnist = model(X_train_mnist, y_train_mnist, layer_dims, learning_rate=0.0075,
                               num_iterations=2500, print_cost=True)

# Evaluate on the MNIST test set
AL_test_mnist, _ = forward_propagation(X_test_mnist, trained_weights_mnist)
predictions_mnist = AL_test_mnist > 0.5

accuracy_mnist = accuracy_score(y_test_mnist, predictions_mnist)
print(f'MNIST Test Accuracy: {accuracy_mnist * 100:.2f}%')


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Cost after iteration 0: 7022.967233
Cost after iteration 100: 7016.033435
Cost after iteration 200: 7011.269672
Cost after iteration 300: 7007.996450
Cost after iteration 400: 7005.747074
Cost after iteration 500: 7004.201055
Cost after iteration 600: 7003.138301
Cost after iteration 700: 7002.407647
Cost after iteration 800: 7001.905251
Cost after iteration 900: 7001.559764
Cost after iteration 1000: 7001.322158
Cost after iteration 1100: 7001.158731
Cost after iteration 1200: 7001.046316
Cost after iteration 1300: 7000.968987
Cost after iteration 1400: 7000.915789
Cost after iteration 1500: 7000.879190
Cost after iteration 1600: 7000.854011
Cost after iteration 1700: 7000.836687
Cost after iteration 1800: 7000.824767
Cost after iteration 1900: 7000.816566
Cost after iteration 2000: 7000.810923
Cost after iteration 2100: 7000.807040
Cost after iteration 2200: 7000.804369
Cost after iteration 23