In [1]:
from tensorflow import keras

In [2]:
keras.__version__

'2.8.0'

Load the dataset from keras API: https://keras.io/api/datasets/mnist/

In [18]:
mnist_dataset = keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist_dataset.load_data()

In [19]:
print("X_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)

X_train shape: (60000, 28, 28)
y_train shape: (60000,)


Let us create training and validation set from the training dataset that we have.

In [20]:
X_val, X_train = x_train[:5000], x_train[5000:]   #Giving the first 5000 to the development set
y_val, y_train = y_train[:5000], y_train[5000:]

How does the training data looks?

In [21]:
print(X_train[0])

[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0  97  96  77 118  61   0   0
    0   0   0   0   0   0   0   0   0   0]
 [  0   0   0  90 138 235 235 235 235 235 235 251 251 248 254 24

Normalize the dataset

In [22]:
X_train, X_val = X_train/255.0, X_val/255.0

Transform the labels using one hot encoding

In [23]:
y_train = keras.utils.to_categorical(y_train, 10)
y_val = keras.utils.to_categorical(y_val, 10)
y_test = keras.utils.to_categorical(y_test, 10)

In [24]:
y_train[0]

array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.], dtype=float32)

Creating the model

In [27]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(250, activation="relu"))
model.add(keras.layers.Dense(10, activation="softmax"))

In [28]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 250)               196250    
                                                                 
 dense_1 (Dense)             (None, 10)                2510      
                                                                 
Total params: 198,760
Trainable params: 198,760
Non-trainable params: 0
_________________________________________________________________


To get the list of all the layers

In [29]:
model.layers

[<keras.layers.core.flatten.Flatten at 0x7fe6cece4350>,
 <keras.layers.core.dense.Dense at 0x7fe6cefcac90>,
 <keras.layers.core.dense.Dense at 0x7fe6cec66610>]

To get weights and biases of any layer:

In [31]:
weights, biases = model.layers[1].get_weights()

In [34]:
weights

array([[ 0.00289489,  0.01895425,  0.02019456, ...,  0.04611234,
         0.03202727, -0.05436274],
       [-0.02375793,  0.04661249, -0.01192001, ..., -0.03058499,
         0.01915818,  0.04336067],
       [ 0.03305327,  0.00076687, -0.0375293 , ...,  0.0426162 ,
        -0.07544801, -0.01977089],
       ...,
       [ 0.0701687 , -0.05909745,  0.03085898, ..., -0.05985919,
        -0.00929176,  0.04315221],
       [-0.05492038,  0.0357836 ,  0.06926721, ..., -0.06994479,
        -0.07449681,  0.07427424],
       [ 0.00992358,  0.02838346, -0.00557917, ...,  0.01547944,
         0.03796373, -0.05709774]], dtype=float32)

In [33]:
weights.shape

(784, 250)

In [35]:
biases.shape

(250,)

Compiling the model

In [39]:
model.compile(loss="categorical_crossentropy", 
              optimizer="sgd",
              metrics=["accuracy"])

In [40]:
history = model.fit(X_train, y_train, epochs =5, validation_data=(X_val, y_val))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [42]:
model.evaluate(x_test, y_test)



[28.398277282714844, 0.9358999729156494]

In [48]:
y_pred = model.predict(x_test[:5])

In [49]:
y_pred

array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)