In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import tensorflow as tf
import numpy as np

# Set the seed for random operations. 
# This let our experiments to be reproducible. 
tf.random.set_seed(1234)  

# Example: Fashion MNIST - Multi-class classification
## Dataset

In [3]:
# Load built-in dataset
# ---------------------
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

In [4]:
x_train.shape
y_train.shape

(60000, 28, 28)

(60000,)

In [5]:
# Split in training and validation sets
# e.g., 50000 samples for training and 10000 samples for validation

x_valid = x_train[50000:, ...]  #### Modified to have a faster evaluation
y_valid = y_train[50000:, ...]  #### Modified to have a faster evaluation

x_train = x_train[:50000, ...]
y_train = y_train[:50000, ...]

In [6]:
# Create Training Dataset object
# ------------------------------
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))

# Shuffle
train_dataset = train_dataset.shuffle(buffer_size=x_train.shape[0])

# Normalize images
def normalize_img(x_, y_):
    return tf.cast(x_, tf.float32) / 255., y_

train_dataset = train_dataset.map(normalize_img)

# 1-hot encoding <- for categorical cross entropy
def to_categorical(x_, y_):
    return x_, tf.one_hot(y_, depth=10)

train_dataset = train_dataset.map(to_categorical)

# Divide in batches
bs = 32
train_dataset = train_dataset.batch(bs)

# Repeat
# Without calling the repeat function the dataset 
# will be empty after consuming all the images
train_dataset = train_dataset.repeat()

In [7]:
# Create Validation Dataset  
# -----------------------
valid_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))

# Normalize images
valid_dataset = valid_dataset.map(normalize_img)

# 1-hot encoding
valid_dataset = valid_dataset.map(to_categorical)

# Divide in batches
valid_dataset = valid_dataset.batch(bs)

# Repeat
valid_dataset = valid_dataset.repeat()

In [8]:
# Create Test Dataset
# -------------------
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))

test_dataset = test_dataset.map(normalize_img)

test_dataset = test_dataset.map(to_categorical)

test_dataset = test_dataset.batch(1)

## Model

In [9]:
# Fashion MNIST classification
# ----------------------------

# x: 28x28
# y: 10 classes

# Create Model
# ------------
which_model = 'base'

if which_model == 'base':
    # Create base model (e.g., Input -> Hidden -> Out)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=(28, 28))) # or as a list
    model.add(tf.keras.layers.Dense(units=1000, activation=tf.keras.activations.sigmoid))
    model.add(tf.keras.layers.Dense(units=10, activation=tf.keras.activations.softmax))
elif which_model == 'base_dropout':
    # Create model with Dropout layer
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=(28, 28))) # or as a list
    model.add(tf.keras.layers.Dense(units=1000, activation=tf.keras.activations.sigmoid))
    model.add(tf.keras.layers.Dropout(0.3))
    model.add(tf.keras.layers.Dense(units=10, activation=tf.keras.activations.softmax))
elif which_model == 'base_weight_decay':
    # Create model with weights penalty (L2 regularization)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=(28, 28))) # or as a list
    model.add(tf.keras.layers.Dense(units=1000, 
                                    activation=tf.keras.activations.sigmoid,
                                    kernel_regularizer=tf.keras.regularizers.l2(0.0001)))
    model.add(tf.keras.layers.Dense(units=10, 
                                    activation=tf.keras.activations.softmax,
                                    kernel_regularizer=tf.keras.regularizers.l2(0.0001)))
    

In [10]:
# Visualize created model as a table
model.summary()

# Visualize initialized weights
model.weights

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 1000)              785000    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                10010     
Total params: 795,010
Trainable params: 795,010
Non-trainable params: 0
_________________________________________________________________


[<tf.Variable 'dense/kernel:0' shape=(784, 1000) dtype=float32, numpy=
 array([[-0.02025537, -0.04190592,  0.01717176, ...,  0.04665055,
         -0.00734134,  0.02484443],
        [ 0.02537891,  0.03631793, -0.05004032, ...,  0.03651362,
          0.04028282,  0.01237207],
        [-0.02591686,  0.05758444,  0.00186032, ..., -0.0287274 ,
          0.01839119, -0.04242979],
        ...,
        [-0.03051765,  0.00858321,  0.02899709, ...,  0.02249118,
         -0.00020791,  0.01880491],
        [-0.03774619,  0.04266834, -0.01642644, ...,  0.03594344,
         -0.02973929,  0.05186799],
        [ 0.03166064,  0.00914184, -0.02561587, ...,  0.05204986,
          0.01391875,  0.02314176]], dtype=float32)>,
 <tf.Variable 'dense/bias:0' shape=(1000,) dtype=float32, numpy=
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0

## Prepare the model for training

In [11]:
# Optimization params
# -------------------

# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# learning rate
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = ['accuracy']
# ------------------

# Compile Model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

## Training with callbacks

In [12]:
import os
from datetime import datetime

cwd = os.getcwd()

exps_dir = os.path.join(cwd, 'overfitting_experiments')
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

exp_name = which_model

exp_dir = os.path.join(exps_dir, exp_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)
    
callbacks = []

# Model checkpoint
# ----------------
ckpt_dir = os.path.join(exp_dir, 'ckpts')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp_{epoch:02d}.ckpt'), 
                                                   save_weights_only=True)  # False to save the model directly
callbacks.append(ckpt_callback)

# ----------------

# Visualize Learning on Tensorboard
# ---------------------------------
tb_dir = os.path.join(exp_dir, 'tb_logs')
if not os.path.exists(tb_dir):
    os.makedirs(tb_dir)
    
# By default shows losses and metrics for both training and validation
tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                             profile_batch=0,
                                             histogram_freq=1)  # if 1 shows weights histograms
callbacks.append(tb_callback)

# Early Stopping
# --------------
early_stop = False
if early_stop:
    es_callback = tf.keras.callback.EarlyStopping(monitor='val_loss', patience=10)
    callbacks.append(es_callback)

# ---------------------------------

model.fit(x=train_dataset,
          epochs=2,  #### set repeat in training dataset
          steps_per_epoch=int(np.ceil(x_train.shape[0] / bs)),
          validation_data=valid_dataset,
          validation_steps=int(np.ceil(x_valid.shape[0] / bs)), 
          callbacks=callbacks)

# How to visualize Tensorboard

# 1. tensorboard --logdir EXPERIMENTS_DIR --port PORT     <- from terminal
# 2. localhost:PORT   <- in your browser

Train for 1563 steps, validate for 313 steps
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x159749650>

## Test model

In [13]:
# Let's try a different way to give data to model 
# using directly the NumPy arrays

# model.load_weights('/path/to/checkpoint')  # use this if you want to restore saved model

eval_out = model.evaluate(x=x_test / 255.,
                          y=tf.keras.utils.to_categorical(y_test),
                          verbose=0)

eval_out

[0.4091281944036484, 0.8531]

## Compute prediction

In [17]:
# Compute output given x
import Image
shoe_img = Image.open('shoe.png').convert('L')

shoe_arr = np.expand_dims(np.array(shoe_img), 0)

out_softmax = model.predict(x=shoe_arr / 255.)

out_softmax  # is already a probability distribution (softmax)

# Get predicted class as the index corresponding to the maximum value in the vector probability
predicted_class = tf.argmax(out_softmax, 1)
predicted_class

ModuleNotFoundError: No module named 'Image'