In [1]:
## Importing the packages 

import numpy as np 
from matplotlib import pyplot as plt

In [2]:
# MNIST dataset (train + test)

def download_minist_dataset():
    # downloads and unzips the mnist dataset to the computer
    
    import os
    import urllib.request
    import urllib 
    from zipfile import ZipFile
    URL = 'https://nnfs.io/datasets/fashion_mnist_images.zip'
    FILE = 'fashion_mnist_images.zip'
    FOLDER = 'fashion_mnist_images'
    if not os.path.isfile(FILE):
        print(f'Downloading {URL} and saving as {FILE}...')
        urllib.request.urlretrieve(URL, FILE)
    print('Unzipping images...')
    with ZipFile(FILE) as zip_images:
        zip_images.extractall(FOLDER)
    print('Done!')


    
def load_mnist_dataset(dataset, path):
    # loads and preprocesses the mnist dataset

    import cv2 # to install: pip install opencv-python
    import os
    # Scan all the directories and create a list of labels
    labels = os.listdir(os.path.join(path, dataset))
    # Create lists for samples and labels
    X = []
    y = []
    # For each label folder
    for label in labels:
        # And for each image in given folder
        for file in os.listdir(os.path.join(path, dataset, label)):
            # Read the image
            image = cv2.imread(os.path.join(
                path, dataset, label, file), cv2.IMREAD_UNCHANGED)
            # And append it and a label to the lists
            X.append(image)
            y.append(label)
    # Convert the data to proper numpy arrays and return
    return np.array(X), np.array(y).astype('uint8')

def create_data_mnist(path):
    # Load both sets separately
    X_train, y_train= load_mnist_dataset('train', path)
    X_test, y_test = load_mnist_dataset('test', path)
    # And return all the data
    return X_train, y_train, X_test, y_test




In [14]:
# loading the dataset 
X_train, y_train = load_mnist_dataset('train', 'fashion_mnist_images')
X_test, y_test = load_mnist_dataset('test', 'fashion_mnist_images')

In [15]:
## just checking 
X_train.shape

(60000, 28, 28)

In [16]:
## convert 2D array to 1D..i.e from 28 by 28 to 784
X_train = X_train/255.
X_test = X_test/255.

num_pixels = 784
X_train = X_train.reshape(X_train.shape[0],
                         num_pixels)
X_test = X_test.reshape(X_test.shape[0],
                         num_pixels)

## just checking 
print(X_train.shape, X_test.shape)

(60000, 784) (10000, 784)


In [17]:
## Shuffuling train data 
np.random.seed(2305) 
np.random.shuffle(X_train)

In [18]:
## Transpose X_train 
X_train = X_train.T 

In [62]:
def init_params():
    W1 = np.random.rand(10, 784) ###  challenge 1
    b1 = np.random.rand(10, 1) 
    W2 = np.random.rand(10, 10) 
    b2 = np.random.rand(10, 1)   # same bias 
    return W1, b1, W2, b2

def ReLU(Z):
    return np.maximum(Z, 0)

def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A
    
def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

def ReLU_deriv(Z):
    return Z > 0

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):### challenge 2 
    m = Y.size
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2) ##
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)#2) ## correct ## Average 
    #db1 = 1 / m * np.mean(dZ1)## update 
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2

In [63]:
def get_predictions(A2):    
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 10 == 0:
            print("Iteration: ", i)
            print("Accuracy: ", get_accuracy(get_predictions(A2), Y))
    return W1, b1, W2, b2

In [64]:
# running gradient descent 
W1, b1, W2, b2 = gradient_descent(X_train, y_train, 0.1, 500)

  A = np.exp(Z) / sum(np.exp(Z))
  A = np.exp(Z) / sum(np.exp(Z))
  A = np.exp(Z) / sum(np.exp(Z))


Iteration:  0
[0 5 5 ... 0 5 5] [0 0 0 ... 9 9 9]
Accuracy:  0.09906666666666666
Iteration:  10
[0 0 0 ... 0 0 0] [0 0 0 ... 9 9 9]
Accuracy:  0.1
Iteration:  20
[0 0 0 ... 0 0 0] [0 0 0 ... 9 9 9]
Accuracy:  0.1
Iteration:  30
[0 0 0 ... 0 0 0] [0 0 0 ... 9 9 9]
Accuracy:  0.1
Iteration:  40
[0 0 0 ... 0 0 0] [0 0 0 ... 9 9 9]
Accuracy:  0.1
Iteration:  50
[0 0 0 ... 0 0 0] [0 0 0 ... 9 9 9]
Accuracy:  0.1


KeyboardInterrupt: 

In [None]:
## My Challenges 
#----------------------------#

#1. innit_params function 
#2. backward_prop function: I couldnt get the formula correctly
 
#I had a lot of errors trying to change the formula
