## Flatness v.s. Generalization - part 2

In [1]:
import numpy as np
from matplotlib import pyplot as plt
from keras.datasets import mnist
from keras.layers import *
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras import backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### Data Preprocessing

In [2]:
# Load Data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Normalize
x_train = x_train / 255
x_test = x_test / 255
# Get One-Hot Labels
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

### Build Model

In [3]:
def build_model(print_summary=False):
    '''
    Build model
    Args:
        print_summary: bool, whether or not to print model summary, default: False
    Returns:
        model: keras model
    '''
    
    model = Sequential()
    model.add(Flatten(input_shape=(28, 28), name="input"))
    model.add(Dense(16, activation="relu", name="fc1"))
    model.add(Dense(16, activation="relu", name="fc2"))
    model.add(Dense(10, activation="softmax", name="output"))

    if print_summary:
        model.summary()
    
    return model

### Train Model

#### Batch size=8

In [4]:
model8 = build_model(True)
model8.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
history8 = model8.fit(x_train[:3000], y_train[:3000], batch_size=8, epochs=100, validation_data=(x_test[:1000], y_test[:1000]))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (Flatten)              (None, 784)               0         
_________________________________________________________________
fc1 (Dense)                  (None, 16)                12560     
_________________________________________________________________
fc2 (Dense)                  (None, 16)                272       
_________________________________________________________________
output (Dense)               (None, 10)                170       
Total params: 13,002
Trainable params: 13,002
Non-trainable params: 0
_________________________________________________________________
Train on 3000 samples, validate on 1000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/1

Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


#### Batch size=64

In [None]:
model64 = build_model(True)
model64.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
history64 = model64.fit(x_train[:3000], y_train[:3000], batch_size=8, epochs=100, validation_data=(x_test[:1000], y_test[:1000]))

#### Batch size=128

In [None]:
model128 = build_model(True)
model128.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
history128 = model128.fit(x_train[:3000], y_train[:3000], batch_size=8, epochs=100, validation_data=(x_test[:1000], y_test[:1000]))

#### Batch size=512

In [None]:
model512 = build_model(True)
model512.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
history512 = model512.fit(x_train[:3000], y_train[:3000], batch_size=8, epochs=100, validation_data=(x_test[:1000], y_test[:1000]))

#### Batch size=1024

In [None]:
model1024 = build_model(True)
model1024.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
history1024 = model1024.fit(x_train[:3000], y_train[:3000], batch_size=8, epochs=100, validation_data=(x_test[:1000], y_test[:1000]))

### Calculate Sensitivity

In [None]:
def cal_sensitivity(model, x, y):
    # Define tensorflow placeholder
    input_tensors = [
        model.inputs[0], # input
        model.sample_weights[0], # sample weights
        model.targets[0], # labels
        K.learning_phase() # train or test mode
    ]
    sensitivity = K.gradients(model.total_loss, model.inputs[0])
    # Define K.function()
    get_gradients = K.function(inputs=input_tensors, outputs=sensitivity)

    inputs = [
        x, # X input data
        np.ones((x.shape[0],)), # sample weights
        y, # y labels
        0 # learning phase in TEST mode
    ]
    # Call K.function()
    g = get_gradients(inputs)
    # Apply 2-norm
    g_0 = np.sum(g[0]**2)
    g_all = np.sqrt(g_0)
    return g_all

In [None]:
sensitivity = []
sensitivity.append(cal_sensitivity(model8, x_train[0].reshape(1,28,28), y_train[0].reshape(1,10)))