In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(60000, 28, 28) (60000,)
(10000, 28, 28) (10000,)


In [4]:
# Why didn't we use sigmoid or tanh: https://machinelearningmastery.com/rectified-linear-activation-function-for-deep-learning-neural-networks/
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),         # (1)
  tf.keras.layers.Dense(512, activation='relu'),         # (2)
  tf.keras.layers.Dense(512, activation='relu'),         # (3)
  tf.keras.layers.Dense(10, activation='softmax')        # (4)
])

In [5]:
# Convert the output to categorical data
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [6]:
model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7d4030356bf0>

## Data Normalization

In [7]:
# Since values in the images are between 0 and 255, and weights are between -1 and 1
# learning is difficult because of huge multiplicative values
import numpy as np
x_train = (x_train / 255.0).astype(np.float32)
x_test  = (x_test  / 255.0).astype(np.float32)

In [8]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7d40293158d0>

In [9]:
model.evaluate(x_test, y_test)



[0.08852319419384003, 0.9817000031471252]

## Standardization

In [10]:
# If the data is normally distributed, it easier to standardize the data
# Calc mean and std for it
# Loading data sets again for standartization
(x_train, y_train), (x_test, y_test) = mnist.load_data()
mean = np.mean(x_train)
std = np.std(x_train)
x_train= ((x_train - mean) / std).astype(np.float32)

In [11]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7d4029d0b880>

In [None]:
# Values in similar scale ie Standardization improves the performance of the model