In [109]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from sklearn import preprocessing

In [110]:
# Loading the MNIST datasets - 60000 images for training and 10000 images for testing
(imageTrain, labelTrain), (imageTest, labelTest) = tf.keras.datasets.mnist.load_data()
# Splitting the training set into 2: 55000 images for training and 5000 images for validation
# Raw 2d array
imageTrainRaw = imageTrain[:55000]
labelTrainRaw = labelTrain[:55000]
imageValidRaw = imageTrain[55000:]
labelValidRaw = labelTrain[55000:]
# Processed 1d array
imageTrainPro = np.reshape(imageTrainRaw, (55000, 784))
imageTrainPro = preprocessing.normalize(imageTrainPro, norm="max")
labelTrainPro = np.zeros((55000, 10))
for idx in range(55000):
    labelTrainPro[idx][labelTrainRaw[idx]] = 1
imageValidPro = np.reshape(imageValidRaw, (5000, 784))
imageValidPro = preprocessing.normalize(imageValidPro, norm = "max")
labelValidPro = np.zeros((5000, 10))
for idx in range(5000):
    labelValidPro[idx][labelValidRaw[idx]] = 1

In [111]:
# Visualizing an image array
def visualizeArr(idx):
    imageArr = ""
    for row in imageTrainRaw[idx]:
        for col in row:
            if(len(str(col)) == 1):
                print(str(col) + "   ", end =" ")
                imageArr += str(col) + "   "
            elif(len(str(col)) == 2):
                print(str(col) + "  ", end =" ")
                imageArr += str(col) + "  "
            else:
                print(str(col) + " ", end =" ")
                imageArr += str(col) + " "
        print()
        imageArr += "\n"
    with open("./test/imageArr.txt", "w") as file:
        file.write(imageArr)
# Plotting an image
def plotImage(idx):
    plt.title(f"Digit {labelTrainRaw[idx]}")
    plt.imshow(imageTrainRaw[idx], cmap='gray')
    plt.savefig("./test/digit_grey.jpg")

In [122]:
# Setting up the neural network architecture and hyperparameters
inputLayer = 784  # input layer (28x28 pixels)
hiddenLayer1 = 512  # 1st hidden layer (14x14 pixel)
hiddenLayer2 = 256  # 2nd hidden layer
hiddenLayer3 = 128  # 3rd hidden layer
outputLayer = 10  # output layer (0-9 digits)

In [131]:
learningRate = np.float_power(10, -4)
iterations = 300000

theta = np.random.rand(784, 10)
theta = np.reshape(theta, (10, 784))
b = np.random.rand(10)

# Implement softmax function using cross-entropy and SGD
def softmax(y):
    e = np.exp(y)
    return e / sum(e)

def fLoss(theta, b, idx): # Using cross-entropy and SGD
    result = 0
    x = imageTrainPro[idx]
    yCal = theta.dot(x) + b
    yCal = softmax(yCal)
    yGiven = labelTrainPro[idx]
    for c in range(10):
        result += -(yGiven[c] * np.log(yCal[c]))
    return result

def dfLoss(theta, b, idx): # Using SGD
    x = imageTrainPro[idx]
    yCal = theta.dot(x) + b
    yCal = softmax(yCal)
    yGiven = labelTrainPro[idx]
    result_w = (yCal - yGiven).reshape(10,1).dot(x.reshape(1, 784))
    result_b =  yCal - yGiven
    return (result_w, result_b)

time = 0
lossPrev = fLoss(theta, b, np.random.randint(0, 55000))      
while True:
    time += 1
    idx = np.random.randint(0, 55000)
    (dloss_w, dloss_b) = dfLoss(theta, b, idx)
    theta = theta - learningRate * dloss_w
    b = b - learningRate * dloss_b
    loss = fLoss(theta, b, idx)
    if(loss < lossPrev):
        lossPrev = loss
        thetaRes = theta
        bRes = b
        print(f"[{time}] Loss: {loss}")
        if(loss < np.float_power(10, -6)):
            break
        
count = 0
for i in range(5000):
    x_valid = imageValidPro[i]
    y_valid = softmax(thetaRes.dot(x_valid) + bRes)
    result = np.argmax(y_valid)
    if(result == labelValidRaw[i]):
        count += 1
print((count / 5000))

[1] Loss: 3.0349079817602327
[7] Loss: 2.889634000889487
[11] Loss: 0.4820046257753735
[34] Loss: 0.35304538736984353
[131] Loss: 0.08547753349005642
[234] Loss: 0.030342296315341338
[999] Loss: 0.01676437018575291
[16291] Loss: 0.015363532767866037
[17982] Loss: 0.006971818526132726
[33761] Loss: 0.0018920435871449901
[45104] Loss: 0.0014984887651255745
[51408] Loss: 0.0010386233673721527
[61646] Loss: 0.0009357457795374462
[71668] Loss: 0.000879162776316626
[81719] Loss: 0.000559586164175905
[88923] Loss: 0.0005181628668953624
[103641] Loss: 0.00043907039294440153
[107505] Loss: 0.00043229261407406544
[108386] Loss: 0.00033063453888280134
[108508] Loss: 0.0003252642281684447
[120761] Loss: 0.0002968712175398353
[122735] Loss: 0.0001541354352387583
[127262] Loss: 0.0001343806461181574
[136302] Loss: 9.966554411587004e-05
[143821] Loss: 8.821208597221959e-05
[179797] Loss: 5.80408445504169e-05
[192150] Loss: 5.6110395316707534e-05
[196926] Loss: 5.0317806893039824e-05
[199115] Loss: 5.

In [None]:
count = 0
for i in range(5000):
    x_valid = imageValidPro[i]
    y_valid = softmax(theta.dot(x_valid) + b)
    result = np.argmax(y_valid)
    if(result == labelValidRaw[i]):
        count += 1
print((count / 5000))