In [4]:
import os
import numpy as np
import matplotlib
matplotlib.use('Agg')           
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import tensorflow as tf
from tensorflow import keras

input_shape = (28, 28, 1)
num_classes = 10
VALIDATION_SPLIT = 0.1
BATCH_SIZE = 128

In [36]:
print('\nLoading MNIST')

mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = np.expand_dims(x_train, -1)
x_train = x_train.astype(np.float32) / 255
x_test = np.expand_dims(x_test, -1)
x_test = x_test.astype(np.float32) / 255

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

print('\nSpliting data')

ind = np.random.permutation(x_train.shape[0])
x_train, y_train = x_train[ind], y_train[ind]
n = int(x_train.shape[0] * (1-VALIDATION_SPLIT))
x_val = x_train[n:]
y_val = y_train[n:]
x_train = x_train[:n]
y_train = y_train[:n]


Loading MNIST

Spliting data


In [37]:
print(x_train.shape)
print(x_val.shape)

(54000, 28, 28, 1)
(6000, 28, 28, 1)


In [53]:
print('\nConstructing model')
def make_model(input_shape, num_classes, temp=1):
    inputs = keras.Input(shape = input_shape)
    x = keras.layers.Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(inputs)
    x = keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
    x = keras.layers.Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dense(units=128, activation='relu')(x)
    x = keras.layers.Dropout(0.25)(x)
    outputs = keras.layers.Dense(units=num_classes)(x)
    outputs = outputs / temp
    outputs = keras.layers.Activation('softmax')(outputs)
    return keras.Model(inputs, outputs)


Constructing model


In [59]:
model1 = make_model(input_shape, num_classes, temp=20)
model1.compile(optimizer=keras.optimizers.Adam(1e-3), loss=keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])
print('\nTraining model')
model1.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=5, validation_data=(x_val, y_val))


Training model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fd028cc0cd0>

In [61]:
y_hat = model1.predict(x_train)
model2 = make_model(input_shape, num_classes, temp=20)
model2.compile(optimizer=keras.optimizers.Adam(1e-3), loss=keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])
print('\nTraining model')
model2.fit(x_train, y_hat, batch_size=BATCH_SIZE, epochs=5, validation_data=(x_val, y_val))


Training model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fd02892a850>