In [25]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [26]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [27]:
print(x_train.shape)
print(y_train.shape)

(60000, 28, 28)
(60000,)


In [28]:
x_train = x_train.reshape(-1, 28*28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0

## Sequential API

In [30]:
# Sequential API
# Convenient, NOT Flexible
# Maps 1 input to 1 output
seq_model = keras.Sequential(
    [
     # Layers
     keras.Input(shape=(28*28)),
     layers.Dense(512, activation='relu'),   # Fully Connected Layer
     layers.Dense(256, activation='relu'),
     layers.Dense(10),
    ]
)

seq_model.compile(                # Configuring how model is evaluated
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),        # Loss Function, if we want only CategoricalCrossentropy then we'll need to use one-hot encoding
    optimizer = keras.optimizers.Adam(learning_rate=0.001),
    metrics = ['accuracy'],
)

In [31]:
seq_model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)

Epoch 1/5
1875/1875 - 9s - loss: 0.1878 - accuracy: 0.9425
Epoch 2/5
1875/1875 - 8s - loss: 0.0808 - accuracy: 0.9754
Epoch 3/5
1875/1875 - 8s - loss: 0.0543 - accuracy: 0.9825
Epoch 4/5
1875/1875 - 8s - loss: 0.0401 - accuracy: 0.9867
Epoch 5/5
1875/1875 - 8s - loss: 0.0316 - accuracy: 0.9901


<tensorflow.python.keras.callbacks.History at 0x7fe1256575d0>

In [19]:
seq_model.evaluate(x_test, y_test, batch_size=32, verbose=2)

313/313 - 1s - loss: 2.3561 - accuracy: 0.1019


[2.3561339378356934, 0.10189999639987946]

## Functional API

In [21]:
# Functional API
# Little more Flexible
inputs = keras.Input(shape=(28*28))
x = layers.Dense(512, activation='relu')(inputs)
x = layers.Dense(256, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
func_model = keras.Model(inputs=inputs, outputs=outputs)

In [22]:
func_model.compile(                # Configuring how model is evaluated
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False),        # Loss Function, if we want only CategoricalCrossentropy then we'll need to use one-hot encoding
    optimizer = keras.optimizers.Adam(learning_rate=0.001),
    metrics = ['accuracy'],
)

In [23]:
func_model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)

Epoch 1/5
1875/1875 - 9s - loss: 0.1863 - accuracy: 0.9431
Epoch 2/5
1875/1875 - 8s - loss: 0.0776 - accuracy: 0.9759
Epoch 3/5
1875/1875 - 8s - loss: 0.0554 - accuracy: 0.9829
Epoch 4/5
1875/1875 - 8s - loss: 0.0401 - accuracy: 0.9872
Epoch 5/5
1875/1875 - 8s - loss: 0.0318 - accuracy: 0.9898


<tensorflow.python.keras.callbacks.History at 0x7fe130d82750>

In [24]:
func_model.evaluate(x_test, y_test, batch_size=32, verbose=2)

313/313 - 1s - loss: 0.0763 - accuracy: 0.9789


[0.07633870095014572, 0.9789000153541565]

# Testing Different Optimizers

In [37]:
SGD_model = keras.Sequential(
    [
     # Layers
     keras.Input(shape=(28*28)),
     layers.Dense(512, activation='relu'),   # Fully Connected Layer
     layers.Dense(256, activation='relu'),
     layers.Dense(10),
    ]
)

SGD_model.compile(                # Configuring how model is evaluated
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),        # Loss Function, if we want only CategoricalCrossentropy then we'll need to use one-hot encoding
    optimizer = keras.optimizers.SGD(learning_rate=0.001, momentum=0.05, nesterov=False),
    metrics = ['accuracy'],
)

SGD_model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
print('\n')
SGD_model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 8s - loss: 1.6291 - accuracy: 0.6107
Epoch 2/5
1875/1875 - 7s - loss: 0.7779 - accuracy: 0.8317
Epoch 3/5
1875/1875 - 7s - loss: 0.5357 - accuracy: 0.8715
Epoch 4/5
1875/1875 - 7s - loss: 0.4443 - accuracy: 0.8866
Epoch 5/5
1875/1875 - 7s - loss: 0.3961 - accuracy: 0.8949


313/313 - 1s - loss: 0.3627 - accuracy: 0.9023


[0.3627067506313324, 0.9023000001907349]

In [38]:
#momentum = 0.09        train_accuracy = 0.8944       test_accuracy = 0.9038      nesterov = True
#momentum = 0.05        train_accuracy = 0.8954       test_accuracy = 0.9051      nesterov = True
#momentum = 0.01        train_accuracy = 0.8915       test_accuracy = 0.9014      nesterov = True

#momentum = 0.05        train_accuracy = 0.8949       test_accuracy = 0.8949      nesterov = False

In [39]:
RMSprop_model = keras.Sequential(
    [
     # Layers
     keras.Input(shape=(28*28)),
     layers.Dense(512, activation='relu'),   # Fully Connected Layer
     layers.Dense(256, activation='relu'),
     layers.Dense(10),
    ]
)

RMSprop_model.compile(                # Configuring how model is evaluated
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),        # Loss Function, if we want only CategoricalCrossentropy then we'll need to use one-hot encoding
    optimizer = keras.optimizers.RMSprop(learning_rate=0.001, rho=0.5, momentum=0.05, epsilon=1e-05, centered=True) ,
    metrics = ['accuracy'],
)

RMSprop_model.fit(x_train, y_train, batch_size=32, epochs=5, verbose=2)
print('\n')
RMSprop_model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/5
1875/1875 - 9s - loss: 0.2043 - accuracy: 0.9379
Epoch 2/5
1875/1875 - 9s - loss: 0.0840 - accuracy: 0.9743
Epoch 3/5
1875/1875 - 9s - loss: 0.0551 - accuracy: 0.9832
Epoch 4/5
1875/1875 - 9s - loss: 0.0405 - accuracy: 0.9872
Epoch 5/5
1875/1875 - 8s - loss: 0.0287 - accuracy: 0.9910


313/313 - 1s - loss: 0.0722 - accuracy: 0.9801


[0.07219903916120529, 0.9800999760627747]