In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # ignore information messages from tensorflow

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [2]:
# use GPU
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [3]:
# loading dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape)
print(y_train.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
(60000, 28, 28)
(60000,)


In [4]:
# flatten and normalize
x_train = x_train.reshape(-1, 28*28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28*28).astype("float32") / 255.0

# x_train = tf.convert_to_tensor(x_train)

In [5]:
# create a basic neural network

# using sequential API (very convenient, not very flexible)
# It only allows us to map one input into one output
model = keras.Sequential(
    [
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10),
    ]
)

model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics = ["accuracy"],
)

model.fit(x_train, y_train, batch_size = 32, epochs = 5, verbose = 2)
model.evaluate(x_test, y_test, batch_size = 32, verbose = 2)



Epoch 1/5
1875/1875 - 6s - loss: 0.1850 - accuracy: 0.9441 - 6s/epoch - 3ms/step
Epoch 2/5
1875/1875 - 5s - loss: 0.0791 - accuracy: 0.9751 - 5s/epoch - 2ms/step
Epoch 3/5
1875/1875 - 4s - loss: 0.0559 - accuracy: 0.9820 - 4s/epoch - 2ms/step
Epoch 4/5
1875/1875 - 4s - loss: 0.0391 - accuracy: 0.9876 - 4s/epoch - 2ms/step
Epoch 5/5
1875/1875 - 5s - loss: 0.0335 - accuracy: 0.9896 - 5s/epoch - 2ms/step
313/313 - 1s - loss: 0.0825 - accuracy: 0.9776 - 704ms/epoch - 2ms/step


[0.08254842460155487, 0.9775999784469604]

In [6]:
# create a basic neural network

# using sequential API (very convenient, not very flexible)
# It only allows us to map one input into one output
model = keras.Sequential(
    [
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10),
    ]
)

model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics = ["accuracy"],
)

model.fit(x_train, y_train, batch_size = 32, epochs = 15, verbose = 2)
model.evaluate(x_test, y_test, batch_size = 32, verbose = 2)



Epoch 1/15
1875/1875 - 5s - loss: 0.1868 - accuracy: 0.9438 - 5s/epoch - 3ms/step
Epoch 2/15
1875/1875 - 5s - loss: 0.0788 - accuracy: 0.9758 - 5s/epoch - 3ms/step
Epoch 3/15
1875/1875 - 4s - loss: 0.0546 - accuracy: 0.9825 - 4s/epoch - 2ms/step
Epoch 4/15
1875/1875 - 4s - loss: 0.0429 - accuracy: 0.9867 - 4s/epoch - 2ms/step
Epoch 5/15
1875/1875 - 5s - loss: 0.0325 - accuracy: 0.9897 - 5s/epoch - 3ms/step
Epoch 6/15
1875/1875 - 4s - loss: 0.0281 - accuracy: 0.9913 - 4s/epoch - 2ms/step
Epoch 7/15
1875/1875 - 4s - loss: 0.0248 - accuracy: 0.9919 - 4s/epoch - 2ms/step
Epoch 8/15
1875/1875 - 5s - loss: 0.0209 - accuracy: 0.9931 - 5s/epoch - 3ms/step
Epoch 9/15
1875/1875 - 4s - loss: 0.0197 - accuracy: 0.9937 - 4s/epoch - 2ms/step
Epoch 10/15
1875/1875 - 4s - loss: 0.0158 - accuracy: 0.9947 - 4s/epoch - 2ms/step
Epoch 11/15
1875/1875 - 5s - loss: 0.0165 - accuracy: 0.9950 - 5s/epoch - 2ms/step
Epoch 12/15
1875/1875 - 4s - loss: 0.0143 - accuracy: 0.9959 - 4s/epoch - 2ms/step
Epoch 13/15
1

[0.09232733398675919, 0.982699990272522]

In [7]:
# create a basic neural network

# using sequential API (very convenient, not very flexible)
# It only allows us to map one input into one output
model = keras.Sequential(
    [
        keras.Input(shape=(28*28)), #specify the input
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10),
    ]
)

print(model.summary()) # print the model summary

# exit the code with sys.exit

import sys
sys.exit()


model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics = ["accuracy"],
)

model.fit(x_train, y_train, batch_size = 32, epochs = 15, verbose = 2)
model.evaluate(x_test, y_test, batch_size = 32, verbose = 2)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 512)               401920    
                                                                 
 dense_7 (Dense)             (None, 256)               131328    
                                                                 
 dense_8 (Dense)             (None, 10)                2570      
                                                                 
Total params: 535818 (2.04 MB)
Trainable params: 535818 (2.04 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [9]:
# create a basic neural network

# using sequential API (very convenient, not very flexible)
# It only allows us to map one input into one output
model = keras.Sequential(
    [
        keras.Input(shape=(28*28)), #specify the input
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10),
    ]
)

# manual intervention
model = keras.Sequential()
model.add(keras.Input(shape=(754)))
model.add(layers.Dense(512, activation='relu'))
print(model.summary()) # printing summary after 1 layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(10))
print(model.summary()) # prinitng summary after 2 layers



Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_18 (Dense)            (None, 512)               386560    
                                                                 
Total params: 386560 (1.47 MB)
Trainable params: 386560 (1.47 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_18 (Dense)            (None, 512)               386560    
                                                                 
 dense_19 (Dense)            (None, 256)               131328    
                                                                 
 dense_20 (Dense)            (None, 10)                2570      
                                                         

In [14]:
# create a basic neural network

# using sequential API (very convenient, not very flexible)
# It only allows us to map one input into one output
model = keras.Sequential(
    [
        keras.Input(shape=(28*28)), #specify the input
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10),
    ]
)

# manual intervention
model = keras.Sequential()
model.add(keras.Input(shape=(784)))
model.add(layers.Dense(512, activation='relu'))
print(model.summary()) # printing summary after 1 layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(10))
print(model.summary()) # prinitng summary after 2 layers


print('Functional API')
# Functional API (A bit more flexible)
inputs = keras.Input(shape=(784))
x = layers.Dense(512, activation='relu')(inputs)
x = layers.Dense(256, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
moodel = keras.Model(inputs=inputs, outputs=outputs)

print(model.summary())


model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = False),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics = ["accuracy"],
)

model.fit(x_train, y_train, batch_size = 32, epochs = 5, verbose = 2)
model.evaluate(x_test, y_test, batch_size = 32, verbose = 2)

Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_60 (Dense)            (None, 512)               401920    
                                                                 
Total params: 401920 (1.53 MB)
Trainable params: 401920 (1.53 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_60 (Dense)            (None, 512)               401920    
                                                                 
 dense_61 (Dense)            (None, 256)               131328    
                                                                 
 dense_62 (Dense)            (None, 10)                2570      
                                                       



None
Epoch 1/5
1875/1875 - 8s - loss: 2.5920 - accuracy: 0.1510 - 8s/epoch - 4ms/step
Epoch 2/5
1875/1875 - 4s - loss: 2.3023 - accuracy: 0.1076 - 4s/epoch - 2ms/step
Epoch 3/5
1875/1875 - 5s - loss: 2.3023 - accuracy: 0.1076 - 5s/epoch - 3ms/step
Epoch 4/5
1875/1875 - 4s - loss: 2.3023 - accuracy: 0.1076 - 4s/epoch - 2ms/step
Epoch 5/5
1875/1875 - 4s - loss: 2.3022 - accuracy: 0.1077 - 4s/epoch - 2ms/step
313/313 - 1s - loss: 2.3019 - accuracy: 0.1072 - 627ms/epoch - 2ms/step


[2.3019001483917236, 0.10719999670982361]

In [16]:
# We can name the specific layers
# create a basic neural network

# using sequential API (very convenient, not very flexible)
# It only allows us to map one input into one output
model = keras.Sequential(
    [
        keras.Input(shape=(28*28)), #specify the input
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10),
    ]
)

# manual intervention
model = keras.Sequential()
model.add(keras.Input(shape=(784)))
model.add(layers.Dense(512, activation='relu'))
print(model.summary()) # printing summary after 1 layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(10))
print(model.summary()) # prinitng summary after 2 layers


print('Functional API')
# Functional API (A bit more flexible)
inputs = keras.Input(shape=(784))
x = layers.Dense(512, activation='relu', name='first_layer')(inputs)
x = layers.Dense(256, activation='relu', name='second_layer')(x)
outputs = layers.Dense(10, activation='softmax')(x)
moodel = keras.Model(inputs=inputs, outputs=outputs)

print(model.summary())


model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = False),
    optimizer = keras.optimizers.Adam(learning_rate=0.001),
    metrics = ["accuracy"],
)

model.fit(x_train, y_train, batch_size = 32, epochs = 5, verbose = 2)
model.evaluate(x_test, y_test, batch_size = 32, verbose = 2)

Model: "sequential_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_76 (Dense)            (None, 512)               401920    
                                                                 
Total params: 401920 (1.53 MB)
Trainable params: 401920 (1.53 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Model: "sequential_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_76 (Dense)            (None, 512)               401920    
                                                                 
 dense_77 (Dense)            (None, 256)               131328    
                                                                 
 dense_78 (Dense)            (None, 10)                2570      
                                                       

[2.30259108543396, 0.15680000185966492]

In [18]:
# create a basic neural network

# using sequential API (very convenient, not very flexible)
# It only allows us to map one input into one output
model = keras.Sequential(
    [
        keras.Input(shape=(28*28)), #specify the input
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10),
    ]
)

# manual intervention = Sequential API
model = keras.Sequential()
model.add(keras.Input(shape=(784)))
model.add(layers.Dense(512, activation='relu'))
print(model.summary()) # printing summary after 1 layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(10))
print(model.summary()) # prinitng summary after 2 layers

model = keras.Model(inputs=model.inputs,
                    outputs=[model.layers[-2].output])
feature = model.predict(x_train)
print(feature.shape)




import sys
sys.exit()

print('Functional API')
# Functional API (A bit more flexible)
inputs = keras.Input(shape=(784))
x = layers.Dense(512, activation='relu')(inputs)
x = layers.Dense(256, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
moodel = keras.Model(inputs=inputs, outputs=outputs)

print(model.summary())


model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = False),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics = ["accuracy"],
)

model.fit(x_train, y_train, batch_size = 32, epochs = 5, verbose = 2)
model.evaluate(x_test, y_test, batch_size = 32, verbose = 2)

Model: "sequential_24"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_89 (Dense)            (None, 512)               401920    
                                                                 
Total params: 401920 (1.53 MB)
Trainable params: 401920 (1.53 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Model: "sequential_24"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_89 (Dense)            (None, 512)               401920    
                                                                 
 dense_90 (Dense)            (None, 256)               131328    
                                                                 
 dense_91 (Dense)            (None, 10)                2570      
                                                       

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [19]:
# create a basic neural network

# using sequential API (very convenient, not very flexible)
# It only allows us to map one input into one output
model = keras.Sequential(
    [
        keras.Input(shape=(28*28)), #specify the input
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10),
    ]
)

# manual intervention = Sequential API
model = keras.Sequential()
model.add(keras.Input(shape=(784)))
model.add(layers.Dense(512, activation='relu'))

model.add(layers.Dense(256, activation='relu', name='my_layer'))
model.add(layers.Dense(10))

model = keras.Model(inputs=model.inputs,
                    outputs=[model.get_layer('my_layer').output])
feature = model.predict(x_train)
print(feature.shape)




import sys
sys.exit()

print('Functional API')
# Functional API (A bit more flexible)
inputs = keras.Input(shape=(784))
x = layers.Dense(512, activation='relu')(inputs)
x = layers.Dense(256, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
moodel = keras.Model(inputs=inputs, outputs=outputs)

print(model.summary())


model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = False),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics = ["accuracy"],
)

model.fit(x_train, y_train, batch_size = 32, epochs = 5, verbose = 2)
model.evaluate(x_test, y_test, batch_size = 32, verbose = 2)

(60000, 256)


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [20]:
# create a basic neural network

# using sequential API (very convenient, not very flexible)
# It only allows us to map one input into one output
model = keras.Sequential(
    [
        keras.Input(shape=(28*28)), #specify the input
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(10),
    ]
)

# manual intervention = Sequential API
model = keras.Sequential()
model.add(keras.Input(shape=(784)))
model.add(layers.Dense(512, activation='relu'))

model.add(layers.Dense(256, activation='relu', name='my_layer'))
model.add(layers.Dense(10))

model = keras.Model(inputs=model.inputs,
                    outputs=[layer.output for layer in model.layers])
features = model.predict(x_train)

for feature in features:
  print(feature.shape)


import sys
sys.exit()

print('Functional API')
# Functional API (A bit more flexible)
inputs = keras.Input(shape=(784))
x = layers.Dense(512, activation='relu')(inputs)
x = layers.Dense(256, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
moodel = keras.Model(inputs=inputs, outputs=outputs)

print(model.summary())


model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = False),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics = ["accuracy"],
)

model.fit(x_train, y_train, batch_size = 32, epochs = 5, verbose = 2)
model.evaluate(x_test, y_test, batch_size = 32, verbose = 2)

(60000, 512)
(60000, 256)
(60000, 10)


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


## Suggestions

1. Try and see what accuracy you can get by increasing the model, training for longer, etcetera. You should be able to get over 98.2% on the test set!
2. Try using different optimizers than Adam, for example Gradient Descent with Momentum, Adagrad, and RMSprop.
3. Is there any difference if you remove the normalization of the data?