In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

  from IPython.core.display import display, HTML


In [2]:
import numpy as np
from keras.datasets import fashion_mnist
import keras
import matplotlib.pyplot as plt

fashion_mnist=keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

2024-02-13 10:30:13.844533: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-13 10:30:13.844574: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-13 10:30:13.845597: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-13 10:30:13.851699: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
class NeuralNetwork:
    def __init__(self, layer_sizes):
        self.layer_sizes = layer_sizes
        self.parameters = self.initialize_parameters()
    
    def initialize_parameters(self):
        parameters = {}
        for l in range(1, len(self.layer_sizes)):
            parameters['W' + str(l)] = np.random.randn(self.layer_sizes[l], self.layer_sizes[l-1]) * 0.01
            parameters['b' + str(l)] = np.zeros((self.layer_sizes[l], 1))
        return parameters

    def relu(self, Z):
        return np.maximum(0, Z)
    
    def softmax(self, Z):
        expZ = np.exp(Z - np.max(Z))
        return expZ / expZ.sum(axis=0, keepdims=True)
    
    def compute_loss(self, Y, Y_hat):
        m = Y.shape[1]
        loss = -np.sum(Y * np.log(Y_hat + 1e-9)) / m
        return loss
    
    def forward_propagation(self, X):
        caches = {}
        A = X
        L = len(self.parameters) // 2
        
        for l in range(1, L):
            A_prev = A
            Z = np.dot(self.parameters['W' + str(l)], A_prev) + self.parameters['b' + str(l)]
            A = self.relu(Z)
            caches['Z' + str(l)] = Z
            caches['A' + str(l)] = A
        
        ZL = np.dot(self.parameters['W' + str(L)], A) + self.parameters['b' + str(L)]
        AL = self.softmax(ZL)
        caches['Z' + str(L)] = ZL
        caches['A' + str(L)] = AL
        return AL, caches
    
    def backpropagation(self, X, Y, caches):
        grads = {}
        L = len(self.parameters) // 2  # Number of layers
        m = X.shape[1]
        Y = Y.reshape(caches['A' + str(L)].shape)  # Ensure same shape as output layer

        # Initializing backpropagation
        dZL = caches['A' + str(L)] - Y
        grads["dW" + str(L)] = 1./m * np.dot(dZL, caches['A' + str(L-1)].T)
        grads["db" + str(L)] = 1./m * np.sum(dZL, axis=1, keepdims=True)

        for l in reversed(range(1, L)):
            dA_prev = np.dot(self.parameters["W" + str(l+1)].T, dZL)
            dZL = np.array(dA_prev, copy=True)
            dZL[caches['Z' + str(l)] <= 0] = 0  # ReLU backward
            if l > 1:
                grads["dW" + str(l)] = 1./m * np.dot(dZL, caches['A' + str(l-1)].T)
            else:  # For the first hidden layer, use X as A0
                grads["dW" + str(l)] = 1./m * np.dot(dZL, X.T)
            grads["db" + str(l)] = 1./m * np.sum(dZL, axis=1, keepdims=True)
            dZL = dA_prev  # Prepare dZL for next layer (if not the first layer)

        return grads
    
    def update_parameters(self, grads, learning_rate):
        L = len(self.parameters) // 2
        for l in range(L):
            self.parameters["W" + str(l+1)] -= learning_rate * grads["dW" + str(l+1)]
            self.parameters["b" + str(l+1)] -= learning_rate * grads["db" + str(l+1)]

In [7]:
def convert_labels_to_one_hot(labels, classes):
    return np.eye(classes)[labels].T

def preprocess_data(train_images, train_labels, test_images, test_labels):
    X_train = train_images.reshape(train_images.shape[0], -1).T / 255.
    X_test = test_images.reshape(test_images.shape[0], -1).T / 255.
    
    Y_train = convert_labels_to_one_hot(train_labels, 10)
    Y_test = convert_labels_to_one_hot(test_labels, 10)
    
    return X_train, Y_train, X_test, Y_test

def plot_training_loss_and_test_acc(epochs, traing_loss, test_accuracy):
    epochs_range = list(range(0, epochs))
    plt.figure(figsize=(10, 5))
    plt.plot(epochs_range, traing_loss, label='Training Loss')
    plt.plot(epochs_range, test_accuracy, label='Test Accuracy')
    plt.title('Training Loss and Test Accuracy over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss/Accuracy')
    plt.legend()
    plt.show()    

In [19]:
def train(X_train, Y_train, X_test, Y_test, epochs=10, learning_rate=0.01):
    np.random.seed(1) 
    nn = NeuralNetwork([X_train.shape[0], 64, 10])
    
    # traing_loss, test_accuracy = [], []
    
    for epoch in range(epochs):
        AL, caches = nn.forward_propagation(X_train)
        loss = nn.compute_loss(Y_train, AL)
        grads = nn.backpropagation(X_train, Y_train, caches)
        nn.update_parameters(grads, learning_rate)
        
        if epoch % 1 == 0:
            print("Epoch %i, Training loss: %f" % (epoch, loss))
    
            # Evaluate model on whole test data after each epoch
            predictions, _ = nn.forward_propagation(X_test)
            accuracy = np.mean(np.argmax(predictions, axis=0) == np.argmax(Y_test, axis=0))
            print(f"Test accuracy : {accuracy}\n")
            
            # traing_loss.append(loss)
            # test_accuracy.append(accuracy)
    
    # plot_training_loss_and_test_acc(epochs, traing_loss, test_accuracy)

X_train, Y_train, X_test, Y_test = preprocess_data(train_images, train_labels, test_images, test_labels)
train(X_train, Y_train, X_test, Y_test, epochs=300, learning_rate=0.1)

Epoch 0, Training loss: 2.302585
Test accuracy : 0.102

Epoch 1, Training loss: 2.302585
Test accuracy : 0.1052

Epoch 2, Training loss: 2.302585
Test accuracy : 0.1096

Epoch 3, Training loss: 2.302585
Test accuracy : 0.1141

Epoch 4, Training loss: 2.302584
Test accuracy : 0.1202

Epoch 5, Training loss: 2.302584
Test accuracy : 0.1243

Epoch 6, Training loss: 2.302584
Test accuracy : 0.1284

Epoch 7, Training loss: 2.302584
Test accuracy : 0.1307

Epoch 8, Training loss: 2.302584
Test accuracy : 0.1325

Epoch 9, Training loss: 2.302584
Test accuracy : 0.1352

Epoch 10, Training loss: 2.302584
Test accuracy : 0.135

Epoch 11, Training loss: 2.302584
Test accuracy : 0.1359

Epoch 12, Training loss: 2.302584
Test accuracy : 0.1384

Epoch 13, Training loss: 2.302584
Test accuracy : 0.1401

Epoch 14, Training loss: 2.302584
Test accuracy : 0.1432

Epoch 15, Training loss: 2.302584
Test accuracy : 0.1458

Epoch 16, Training loss: 2.302584
Test accuracy : 0.1482

Epoch 17, Training loss: 2

Epoch 142, Training loss: 2.302583
Test accuracy : 0.1443

Epoch 143, Training loss: 2.302583
Test accuracy : 0.1453

Epoch 144, Training loss: 2.302583
Test accuracy : 0.1458

Epoch 145, Training loss: 2.302583
Test accuracy : 0.1464

Epoch 146, Training loss: 2.302583
Test accuracy : 0.1475

Epoch 147, Training loss: 2.302583
Test accuracy : 0.148

Epoch 148, Training loss: 2.302583
Test accuracy : 0.1485

Epoch 149, Training loss: 2.302583
Test accuracy : 0.1497

Epoch 150, Training loss: 2.302583
Test accuracy : 0.1497

Epoch 151, Training loss: 2.302583
Test accuracy : 0.1497

Epoch 152, Training loss: 2.302583
Test accuracy : 0.1509

Epoch 153, Training loss: 2.302583
Test accuracy : 0.1514

Epoch 154, Training loss: 2.302583
Test accuracy : 0.1527

Epoch 155, Training loss: 2.302583
Test accuracy : 0.1528

Epoch 156, Training loss: 2.302583
Test accuracy : 0.1541

Epoch 157, Training loss: 2.302583
Test accuracy : 0.1564

Epoch 158, Training loss: 2.302583
Test accuracy : 0.1569

Epoch 282, Training loss: 2.302582
Test accuracy : 0.1942

Epoch 283, Training loss: 2.302582
Test accuracy : 0.1942

Epoch 284, Training loss: 2.302582
Test accuracy : 0.1941

Epoch 285, Training loss: 2.302582
Test accuracy : 0.1943

Epoch 286, Training loss: 2.302582
Test accuracy : 0.1941

Epoch 287, Training loss: 2.302582
Test accuracy : 0.1941

Epoch 288, Training loss: 2.302582
Test accuracy : 0.1943

Epoch 289, Training loss: 2.302582
Test accuracy : 0.1945

Epoch 290, Training loss: 2.302582
Test accuracy : 0.1949

Epoch 291, Training loss: 2.302582
Test accuracy : 0.195

Epoch 292, Training loss: 2.302582
Test accuracy : 0.195

Epoch 293, Training loss: 2.302582
Test accuracy : 0.195

Epoch 294, Training loss: 2.302582
Test accuracy : 0.1949

Epoch 295, Training loss: 2.302582
Test accuracy : 0.195

Epoch 296, Training loss: 2.302582
Test accuracy : 0.195

Epoch 297, Training loss: 2.302582
Test accuracy : 0.1951

Epoch 298, Training loss: 2.302582
Test accuracy : 0.1953

Ep