In [1]:
import numpy as np

from keras.datasets import mnist

from keras.models import Sequential, Model
from keras.layers import Flatten, Dense, Activation, Dropout
from keras.utils import to_categorical

from tqdm import tqdm_notebook

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train =  x_train / 255.0
x_test = x_test / 255.0
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [3]:
x_train.shape

(60000, 28, 28)

In [4]:
model = Sequential()
model.add(Flatten(input_shape=(28, 28)))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

In [5]:
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])

In [6]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
_________________________________________________________________
activation_1 (Activation)    (None, 10)                0         
Total para

In [7]:
model.fit(x_train, y_train, 
          batch_size=64, epochs=10,
          validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1812022550>

In [8]:
model.evaluate(x_test, y_test, verbose=0)

[0.073244131956612363, 0.98150000000000004]

In [9]:
tmp_model = Model(model.inputs, model.layers[-2].output)

In [10]:
def softmax(x, t=1.0):
    x_exp = np.exp(x / t)
    return x_exp / x_exp.sum(axis=1, keepdims=True)

In [11]:
result = {}
for t in tqdm_notebook([1, 4, 16, 32, 128]):
    probs_train = softmax(tmp_model.predict(x_train), t=10)
    for k in tqdm_notebook(range(2, 8), leave=False):
        new_model = Sequential()
        new_model.add(Flatten(input_shape=(28, 28)))
        new_model.add(Dense(512 // 2 ** k, activation='relu'))
        new_model.add(Dropout(0.2))
        new_model.add(Dense(256 // 2 ** k, activation='relu'))
        new_model.add(Dropout(0.2))
        new_model.add(Dense(10))
        new_model.add(Activation('softmax'))

        new_model.compile('adam', 'kullback_leibler_divergence', metrics=['accuracy'])

        new_model.fit(x_train, probs_train, 
                      batch_size=64, epochs=10,
                      validation_data=(x_test, y_test), verbose=0)
        
        result[(t, k)] = new_model.evaluate(x_test, y_test, verbose=0)
        print(result[(t, k)])

[0.67200587158203129, 0.97050000000000003]
[0.70584797477722172, 0.96460000000000001]
[0.79189337244033808, 0.94350000000000001]
[0.97945746726989746, 0.92130000000000001]
[1.3768260456085204, 0.85880000000000001]
[1.7983518890380858, 0.38290000000000002]


[0.67589050464630129, 0.97199999999999998]
[0.69359360065460207, 0.96360000000000001]
[0.78883612651824953, 0.94579999999999997]
[0.96407746601104738, 0.91879999999999995]
[1.402759627532959, 0.83630000000000004]
[2.066173355102539, 0.26989999999999997]


[0.67629583654403691, 0.97250000000000003]
[0.71194907989501954, 0.9627]
[0.79780834226608277, 0.94669999999999999]
[0.99408421936035152, 0.91149999999999998]
[1.4256680187225341, 0.65920000000000001]
[1.7858027519226074, 0.41089999999999999]


[0.65960978164672857, 0.97150000000000003]
[0.69795953760147089, 0.96150000000000002]
[0.78585443038940428, 0.95069999999999999]
[0.99233591518402098, 0.91969999999999996]
[1.5544725862503053, 0.6462]
[1.9899202474594115, 0.3286]


[0.6680708203315735, 0.97019999999999995]
[0.70555324840545652, 0.96289999999999998]
[0.77859001035690312, 0.94389999999999996]
[0.95416801290512088, 0.9173]
[1.4094960582733154, 0.81699999999999995]
[1.779546321105957, 0.51549999999999996]



In [14]:
result_baseline = {}

for k in tqdm_notebook(range(2, 8), leave=False):
    new_model = Sequential()
    new_model.add(Flatten(input_shape=(28, 28)))
    new_model.add(Dense(512 // 2 ** k, activation='relu'))
    new_model.add(Dropout(0.2))
    new_model.add(Dense(256 // 2 ** k, activation='relu'))
    new_model.add(Dropout(0.2))
    new_model.add(Dense(10))
    new_model.add(Activation('softmax'))

    new_model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])

    new_model.fit(x_train, y_train, 
                  batch_size=64, epochs=10,
                  validation_data=(x_test, y_test), verbose=0)
        
    result[(t, k)] = new_model.evaluate(x_test, y_test, verbose=0)
    print(result[(t, k)])

[0.071028074077607015, 0.97940000000000005]
[0.10000545971703249, 0.9718]
[0.151826762181893, 0.95789999999999997]
[0.30684859530925751, 0.91910000000000003]
[0.78056067991256717, 0.84509999999999996]
[1.3253533149719239, 0.58520000000000005]
