In [1]:
from keras.datasets import mnist     # Get MNIST dataset from Keras
from keras.models import Sequential  # Model type to be used
from keras.layers import Dense, Dropout, Activation # Types of layers for the model
from keras.utils import to_categorical  # for converting array of labeled data to one-hot vector. 

import numpy as np                   # advanced math library

2023-12-05 23:32:34.202310: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# 60,000 for training, 10000 for test
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [3]:
# Reshape 28*28 matrices into vectors of length 784 (28 * 28 = 784)
X_train = X_train.reshape(60000, 784, 1) 
X_test = X_test.reshape(10000, 784, 1)

# change integers to 32-bit floating point numbers
X_train = X_train.astype('float32')  
X_test = X_test.astype('float32')

#OHE on y
Y_train = to_categorical(y_train)
Y_test = to_categorical(y_test)

# normalize the inputs to be in the range [0-1] instead of [0-255].
X_train /= 255                       
X_test /= 255

In [4]:
# model is a linear stack of layers
model = Sequential()
# The first hidden layer has 500 neurons
model.add(Dense(500, input_shape=(784,))) # 784 is the length of vector
model.add(Activation('relu'))
model.add(Dropout(0.2)) # protect the model from overfitting 
# The second hidden layer has 500 neurons, too
model.add(Dense(500))
model.add(Activation('relu'))
model.add(Dropout(0.2))
# The final layer is output that has 10 classes
model.add(Dense(10))
model.add(Activation('softmax'))

# Summarize the built model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 500)               392500    
                                                                 
 activation (Activation)     (None, 500)               0         
                                                                 
 dropout (Dropout)           (None, 500)               0         
                                                                 
 dense_1 (Dense)             (None, 500)               250500    
                                                                 
 activation_1 (Activation)   (None, 500)               0         
                                                                 
 dropout_1 (Dropout)         (None, 500)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                5

In [5]:
# categorical cross-entropy is a loss function to comparing two probability distributions.
# use the Adam optimizer for learning
# The optimizer helps determine how quickly the model learns through gradient descent. 

#can also print out other metrics too: eg. F1Score
from keras import metrics

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', metrics.F1Score()])

In [6]:
#Training
model.fit(X_train, Y_train, batch_size=128, epochs=5, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1365b6310>

In [7]:
#Evaluation
score = model.evaluate(X_test, Y_test)
print('Test score:', score[0])
print('Test accuracy:', score[1])
print('Test F1-Score:', np.max(score[2]))

Test score: 0.06137816607952118
Test accuracy: 0.9814000129699707
Test F1-Score: 0.9902999


In [8]:
print(score)

[0.06137816607952118, 0.9814000129699707, array([0.9872644 , 0.9902999 , 0.98295176, 0.9792285 , 0.9810548 ,
       0.9815126 , 0.9844236 , 0.9809291 , 0.97344226, 0.97187954],
      dtype=float32)]
