In [1]:
from keras.datasets import mnist
from model import Layer, NeuralNetwork
import numpy as np
import tensorflow as tf

In [2]:
# Load the training and testing sets
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Print the shapes to verify
print(f"Training images shape: {train_images.shape}")
print(f"Training labels shape: {train_labels.shape}")
print(f"Testing images shape: {test_images.shape}")
print(f"Testing labels shape: {test_labels.shape}")

Training images shape: (60000, 28, 28)
Training labels shape: (60000,)
Testing images shape: (10000, 28, 28)
Testing labels shape: (10000,)


In [3]:
train_images = train_images.reshape(train_images.shape[0],-1)/255
train_labels = train_labels.reshape(-1, 1)
training_data = np.concatenate((train_images, train_labels), axis=1)

dataset = tf.data.Dataset.from_tensor_slices(training_data)
batched_data = dataset.batch(batch_size=100)

In [4]:
def cross_entropy(y_pred, y_true):
    epsilon = 1e-12
    y_pred = np.clip(y_pred, epsilon, 1. - epsilon)
    return -np.sum(y_true * np.log(y_pred))

def d_cross_entropy(y_pred, y_true):
    epsilon = 1e-12
    y_pred = np.clip(y_pred, epsilon, 1. - epsilon)
    return -(y_true / y_pred)

In [5]:
layer1 = Layer(784, 64, activation='relu')
layer2 = Layer(64, 32, activation='relu')
layer3 = Layer(32,10,activation='softmax')

In [6]:
for epoch in range(20):
    cost_history = []
    for j,data in enumerate(batched_data.as_numpy_iterator()):
        X=data[:, 0:784]
        y=data[:, 784]
        y = np.reshape(y,(1,-1))
        y = y.flatten()
        y = np.eye(10)[y.astype(int)]
        
        avg_cost = 0
        learning_rate = 0.01
        
        for i in range(X.shape[0]):
            # print(np.reshape(X[i], (-1,1)).shape)
            x_i = np.reshape(X[i], (-1, 1))
            y_i = np.reshape(y[i], (-1, 1))
            activation1 = layer1.forward(np.reshape(X[i], (-1,1)))
            activation2 = layer2.forward(activation1)
            activation3 = layer3.forward(activation2)
            
            cost = cross_entropy(activation3, y_i)
            cost_history.append(cost)
            
            d_a3 = activation3 - y_i
            d_a2 = layer3.backward(d_a3, learning_rate)
            d_a1 = layer2.backward(d_a2, learning_rate)
            _ = layer1.backward(d_a1, learning_rate)
            
    avg_cost = np.average(cost_history)
    print(f"Average Cost for epoch {epoch+1}: {avg_cost}")
            

Average Cost for epoch 1: 0.2403582843748576
Average Cost for epoch 2: 0.12416441966374471
Average Cost for epoch 3: 0.0954003057459882
Average Cost for epoch 4: 0.08191441771402529
Average Cost for epoch 5: 0.06979456812197284
Average Cost for epoch 6: 0.062400110960592156
Average Cost for epoch 7: 0.057536780653717166
Average Cost for epoch 8: 0.05289985111021666
Average Cost for epoch 9: 0.052732182205348116
Average Cost for epoch 10: 0.04992760923332009
Average Cost for epoch 11: 0.044973980872036294
Average Cost for epoch 12: 0.041378553535727766
Average Cost for epoch 13: 0.039959247581745586
Average Cost for epoch 14: 0.037565615872332994


KeyboardInterrupt: 

In [7]:
correct = 0
total = 0

for i in range(test_images.shape[0]):
    activation1 = layer1.forward(np.reshape(test_images[i], (-1,1)))
    activation2 = layer2.forward(activation1)
    activation3 = layer3.forward(activation2)
    
    y_pred = np.argmax(activation3)
    if y_pred == test_labels[i]: 
        correct+=1
    total+=1

print(f"Accuracy: {correct * 100 / total}")
    

Accuracy: 96.55


In [8]:
model = NeuralNetwork()
model.add(layer1)
model.add(layer2)
model.add(layer3)

model.save("model.npz")