In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

  from IPython.core.display import display, HTML


In [2]:
import numpy as np
from keras.datasets import fashion_mnist
import keras
import matplotlib.pyplot as plt

fashion_mnist=keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

2024-02-18 13:26:22.978481: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-18 13:26:22.978519: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-18 13:26:22.979450: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-18 13:26:22.986575: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return Z > 0

def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def sigmoid_derivative(A):
    return A * (1 - A)

def tanh(Z):
    return np.tanh(Z)

def tanh_derivative(Z):
    return 1 - np.tanh(Z)**2

In [4]:
class NeuralNetwork:
    def __init__(self, layer_sizes, activation='relu'):
        self.layer_sizes = layer_sizes
        self.activation_name = activation
        self.activation, self.activation_derivative = self.set_activation_functions(activation)
        self.parameters = self.initialize_parameters()

    def set_activation_functions(self, activation):
        if activation == 'relu':
            return relu, relu_derivative
        elif activation == 'sigmoid':
            return sigmoid, sigmoid_derivative
        elif activation == 'tanh':
            return tanh, tanh_derivative
        else:
            raise ValueError("Unsupported activation function")
            
    def initialize_parameters(self):
        parameters = {}
        for l in range(1, len(self.layer_sizes)):
            parameters['W' + str(l)] = np.random.randn(self.layer_sizes[l], self.layer_sizes[l-1]) * 0.01
            parameters['b' + str(l)] = np.zeros((self.layer_sizes[l], 1))
        return parameters
    
    def softmax(self, Z):
        expZ = np.exp(Z - np.max(Z))
        return expZ / expZ.sum(axis=0, keepdims=True)
    
    def compute_loss(self, Y, Y_hat):
        m = Y.shape[1]
        loss = -np.sum(Y * np.log(Y_hat + 1e-9)) / m
        return loss
    
    def forward_propagation(self, X):
        caches = {}
        A = X
        L = len(self.parameters) // 2
        
        for l in range(1, L):
            A_prev = A
            Z = np.dot(self.parameters['W' + str(l)], A_prev) + self.parameters['b' + str(l)]
            A = self.activation(Z) 
            caches['Z' + str(l)] = Z
            caches['A' + str(l)] = A
        
        ZL = np.dot(self.parameters['W' + str(L)], A) + self.parameters['b' + str(L)]
        AL = self.softmax(ZL)
        caches['Z' + str(L)] = ZL
        caches['A' + str(L)] = AL
        return AL, caches
    
    def backpropagation(self, X, Y, caches):
        grads = {}
        L = len(self.parameters) // 2 # Number of layers
        m = X.shape[1]
        Y = Y.reshape(caches['A' + str(L)].shape) # Ensure same shape as output layer

        # Initializing backpropagation and Output layer gradient
        dZL = caches['A' + str(L)] - Y
        grads["dW" + str(L)] = 1./m * np.dot(dZL, caches['A' + str(L-1)].T)
        grads["db" + str(L)] = 1./m * np.sum(dZL, axis=1, keepdims=True)

        for l in reversed(range(1, L)):
            dA = np.dot(self.parameters["W" + str(l+1)].T, dZL) # dA_prev
            dZ = self.activation_derivative(caches['Z' + str(l)]) * dA # Element wise multiplication between 2 vectors
            if l > 1:
                grads["dW" + str(l)] = 1./m * np.dot(dZ, caches['A' + str(l-1)].T)
            else: # For the first hidden layer, use X as A0
                grads["dW" + str(l)] = 1./m * np.dot(dZ, X.T)
            grads["db" + str(l)] = 1./m * np.sum(dZ, axis=1, keepdims=True)
            dZL = dZ  # For the next iteration. Prepare dZL for next layer (if not the first layer)

        return grads
    
    def update_parameters(self, grads, learning_rate):
        L = len(self.parameters) // 2
        for l in range(L):
            self.parameters["W" + str(l+1)] -= learning_rate * grads["dW" + str(l+1)]
            self.parameters["b" + str(l+1)] -= learning_rate * grads["db" + str(l+1)]

In [5]:
def convert_labels_to_one_hot(labels, classes):
    return np.eye(classes)[labels].T

def preprocess_data(train_images, train_labels, test_images, test_labels):
    X_train = train_images.reshape(train_images.shape[0], -1).T / 255.
    X_test = test_images.reshape(test_images.shape[0], -1).T / 255.
    
    Y_train = convert_labels_to_one_hot(train_labels, 10)
    Y_test = convert_labels_to_one_hot(test_labels, 10)
    
    return X_train, Y_train, X_test, Y_test

def plot_training_loss_and_test_acc(epochs, traing_loss, test_accuracy):
    epochs_range = list(range(0, epochs))
    plt.figure(figsize=(10, 5))
    plt.plot(epochs_range, traing_loss, label='Training Loss')
    plt.plot(epochs_range, test_accuracy, label='Test Accuracy')
    plt.title('Training Loss and Test Accuracy over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss/Accuracy')
    plt.legend()
    plt.show()    

In [6]:
def train(X_train, Y_train, X_test, Y_test, epochs=10, learning_rate=0.01, activation='relu'):
    np.random.seed(1) 
    nn = NeuralNetwork([X_train.shape[0], 64, 10], activation)
    
    # traing_loss, test_accuracy = [], []
    
    for epoch in range(epochs):
        AL, caches = nn.forward_propagation(X_train)
        loss = nn.compute_loss(Y_train, AL)
        grads = nn.backpropagation(X_train, Y_train, caches)
        nn.update_parameters(grads, learning_rate)
        
        if epoch % 1 == 0:
            print("Epoch %i, Training loss: %f" % (epoch, loss))
    
            # Evaluate model on whole test data after each epoch
            predictions, _ = nn.forward_propagation(X_test)
            accuracy = np.mean(np.argmax(predictions, axis=0) == np.argmax(Y_test, axis=0))
            print(f"Test accuracy : {accuracy}\n")
            
            # traing_loss.append(loss)
            # test_accuracy.append(accuracy)
    
    # plot_training_loss_and_test_acc(epochs, traing_loss, test_accuracy)

X_train, Y_train, X_test, Y_test = preprocess_data(train_images, train_labels, test_images, test_labels)
train(X_train, Y_train, X_test, Y_test, epochs=100, activation='tanh', learning_rate=0.1)

Epoch 0, Training loss: 2.303152
Test accuracy : 0.1867

Epoch 1, Training loss: 2.299979
Test accuracy : 0.2292

Epoch 2, Training loss: 2.296831
Test accuracy : 0.2615

Epoch 3, Training loss: 2.293613
Test accuracy : 0.2723

Epoch 4, Training loss: 2.290235
Test accuracy : 0.2728

Epoch 5, Training loss: 2.286611
Test accuracy : 0.2728

Epoch 6, Training loss: 2.282653
Test accuracy : 0.2684

Epoch 7, Training loss: 2.278273
Test accuracy : 0.2653

Epoch 8, Training loss: 2.273381
Test accuracy : 0.2661

Epoch 9, Training loss: 2.267882
Test accuracy : 0.2715

Epoch 10, Training loss: 2.261677
Test accuracy : 0.2764

Epoch 11, Training loss: 2.254663
Test accuracy : 0.2837

Epoch 12, Training loss: 2.246732
Test accuracy : 0.2919

Epoch 13, Training loss: 2.237769
Test accuracy : 0.3027

Epoch 14, Training loss: 2.227656
Test accuracy : 0.3159

Epoch 15, Training loss: 2.216270
Test accuracy : 0.3305

Epoch 16, Training loss: 2.203492
Test accuracy : 0.3491

Epoch 17, Training loss: