In [16]:
import numpy as np
from sklearn.metrics import accuracy_score
import tensorflow as tf

# Load the Fashion MNIST dataset
fashion_mnist = tf.keras.datasets.fashion_mnist

# Split the dataset into training and test sets
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

# Normalize the pixel values of the images to be between 0 and 1
train_images = train_images / 255.0
test_images = test_images / 255.0

In [3]:
(x_train,y_train),(x_test,y_test) = (train_images, train_labels), (test_images, test_labels)

In [4]:
from sklearn.preprocessing import LabelEncoder

# Initialize the encoder
label_encoder = LabelEncoder()

# Fit the encoder and transform the labels to integers
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

*PARAMETER* 

In [170]:

def param_initialization(nx, nh1, nh2, nh3, nh4, ny):
    W = {
        'w1': np.random.randn(nh1, nx) * 0.01,
        'w2': np.random.randn(nh2, nh1) * 0.01,
        'w3': np.random.randn(nh3, nh2) * 0.01,
        'w4': np.random.randn(nh4, nh3) * 0.01,
        'w5': np.random.randn(ny, nh4) * 0.01
    }
    
    b = {
        'b1': np.zeros((nh1, 1)),
        'b2': np.zeros((nh2, 1)),
        'b3': np.zeros((nh3, 1)),
        'b4': np.zeros((nh4, 1)),
        'b5': np.zeros((ny, 1))
    }

    return W,b

In [160]:
def relu(x):
    return np.maximum(0, x)

def softmax(x):
    exps = np.exp(x - np.max(x, axis=0, keepdims=True))
    return exps / np.sum(exps, axis=0, keepdims=True)


In [162]:
def fwd_prop(x, W, b):
    A = {}
    Z = {}
    
    Z['z1'] = np.dot(W['w1'], x) + b['b1']
    A['a1'] = relu(Z['z1'])

    Z['z2'] = np.dot(W['w2'], A['a1']) + b['b2']
    A['a2'] = relu(Z['z2'])

    Z['z3'] = np.dot(W['w3'], A['a2']) + b['b3']
    A['a3'] = relu(Z['z3'])

    Z['z4'] = np.dot(W['w4'], A['a3']) + b['b4']
    A['a4'] = relu(Z['z4'])

    Z['z5'] = np.dot(W['w5'], A['a4']) + b['b5']
    A['a5'] = softmax(Z['z5'])

    return Z, A

    

In [163]:
def relu_derivative(r):
    return np.where(r > 0, 1, 0)


def softmax_derivative(r):
    return softmax(r) * (1 - softmax(r))

In [164]:

def backward(W, b, x, Z, A, y, lr):
    m = x.shape[1]  # number of examples

    dL_dz5 = A['a5'] - y
    dL_dw5 = (1 / m) * np.dot(dL_dz5, A['a4'].T)
    dL_db5 = (1 / m) * np.sum(dL_dz5, axis=1, keepdims=True)

    dL_da4 = np.dot(W['w5'].T, dL_dz5)
    dL_dz4 = dL_da4 * relu_derivative(Z['z4'])
    dL_dw4 = (1 / m) * np.dot(dL_dz4, A['a3'].T)
    dL_db4 = (1 / m) * np.sum(dL_dz4, axis=1, keepdims=True)

    dL_da3 = np.dot(W['w4'].T, dL_dz4)
    dL_dz3 = dL_da3 * relu_derivative(Z['z3'])
    dL_dw3 = (1 / m) * np.dot(dL_dz3, A['a2'].T)
    dL_db3 = (1 / m) * np.sum(dL_dz3, axis=1, keepdims=True)

    dL_da2 = np.dot(W['w3'].T, dL_dz3)
    dL_dz2 = dL_da2 * relu_derivative(Z['z2'])
    dL_dw2 = (1 / m) * np.dot(dL_dz2, A['a1'].T)
    dL_db2 = (1 / m) * np.sum(dL_dz2, axis=1, keepdims=True)

    dL_da1 = np.dot(W['w2'].T, dL_dz2)
    dL_dz1 = dL_da1 * relu_derivative(Z['z1'])
    dL_dw1 = (1 / m) * np.dot(dL_dz1, x.T)
    dL_db1 = (1 / m) * np.sum(dL_dz1, axis=1, keepdims=True)

    # Update weights
    W['w1'] -= lr * dL_dw1
    W['w2'] -= lr * dL_dw2
    W['w3'] -= lr * dL_dw3
    W['w4'] -= lr * dL_dw4
    W['w5'] -= lr * dL_dw5

    # Update biases
    b['b1'] -= lr * dL_db1
    b['b2'] -= lr * dL_db2
    b['b3'] -= lr * dL_db3
    b['b4'] -= lr * dL_db4
    b['b5'] -= lr * dL_db5

    return W, b    

In [189]:
def model(x_train, y_train, W, b, lr, epochs, batch_size):
    n_batches = int(x_train.shape[0] / batch_size)  # Calculate the number of batches
    
    for epoch in range(epochs):
        for i in range(x_train.shape[0]):
            
            x = x_train[i:i+1].T  # Select a batch of examples
            y = y_train[i:i+1].T  # Select the corresponding labels
            
            # Forward propagation for the batch
            Z, A = fwd_prop(x, W, b)
            
            # Backward propagation for the batch
            W, b = backward(W, b, x, Z, A, y, lr)
        
        # Prediction and accuracy calculation for the entire training set (for simplicity)
        _, A_last = fwd_prop(x_train.T, W, b)
        predictions = predict(A_last)
        accuracy = np.mean(predictions == np.argmax(y_train.T, axis=0)) * 100  # Assuming y_train is one-hot encoded
        
        print(f"Epoch {epoch + 1}/{epochs} complete. Accuracy: {accuracy:.2f}%")

In [186]:
def predict(A_last):
    return np.argmax(A_last, axis=0)
    

In [167]:
x_train = x_train.reshape(x_train.shape[0], -1)
# Assuming you have x_test similar to x_train
x_test = x_test.reshape(x_test.shape[0], -1)


In [190]:

A = {}
lr = 0.01
epochs = 5
W,b = param_initialization(784, 256, 128, 64, 32, 10)
model(x_train, y_train, W, b, lr, epochs,64)

  exps = np.exp(x - np.max(x, axis=0, keepdims=True))


Epoch 1/5 complete. Accuracy: 100.00%
Epoch 2/5 complete. Accuracy: 100.00%


KeyboardInterrupt: 

In [191]:
def check_accuracy(x_test, y_test, W, b):
    # Forward propagation on test data
    _, A_last = fwd_prop(x_test.T, W, b)
    
    # Prediction
    predictions = predict(A_last)
    
    # Directly compare predictions with y_test if y_test is not one-hot encoded
    accuracy = np.mean(predictions == y_test) * 100  # Adjusted line
    return accuracy

# Assuming x_test and y_test are your test dataset and labels
accuracy = check_accuracy(x_test, y_test, W, b)
print(f"Test Accuracy: {accuracy:.2f}%")

Test Accuracy: 10.00%
