# Physics 494/594
## Convolutional Networks: Implementation in Tensorflow

In [None]:
# %load ./include/header.py
import numpy as np
import matplotlib.pyplot as plt
import sys
from tqdm import trange,tqdm
sys.path.append('./include')
import ml4s

%matplotlib inline
%config InlineBackend.figure_format = 'svg'
plt.style.use('./include/notebook.mplstyle')
np.set_printoptions(linewidth=120)
ml4s.set_css_style('./include/bootstrap.css')
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
π = np.pi

## Last Time

### [Notebook Link: 25_Convolutional_Networks.ipynb](./25_Convolutional_Networks.ipynb)

- Convolutional networks: reducing free parameters and encoding the properties of images 
- Explore a simple example to understand how filters work

## Today

- Convolutional network implementation in tensorflow
- Re-train on MNIST and explore improved generalizability

### Import tensorflow

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

### Check if we have access to any GPUs for accelerated training

In [None]:
!nvidia-smi

In [None]:
print(f'tf = {tf.__version__}')
physical_devices = tf.config.list_physical_devices('GPU')
for device in physical_devices:
    print(device)
    
# Determine our strategy for training
if tf.config.list_physical_devices('GPU'):
    strategy = tf.distribute.MirroredStrategy()
else:  # Use the Default Strategy
    strategy = tf.distribute.get_strategy()

### Implement in `keras` for MNIST

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Rescale the images from [0,255] to the [0.0,1.0] range.
x_train, x_test = x_train[..., np.newaxis]/255.0, x_test[..., np.newaxis]/255.0

print("Number of original training examples:", len(x_train))
print("Number of original test examples:", len(x_test))

# determine the properties
rows,cols = x_train[0].shape[:-1]
num_classes = np.max(y_test)+1

# use a built-in function to get 1-hot encoding
y_train_hot = keras.utils.to_categorical(y_train, num_classes)
y_test_hot = keras.utils.to_categorical(y_test, num_classes)

Our MNIST images are greyscale, so we only have 1 channel.  RGB images would have 3.

In [None]:
print('              ', '(batch_size,rows,cols,channels)')
print('x_train shape:', x_train.shape,'\n')
print('              ', '(batch_size,num classes)')
print('y_train shape:', y_train_hot.shape,)

In [None]:
# instantiate model
# model = keras.Sequential(
# [
#     layers.Conv2D(input_shape=(rows,cols, 1), kernel_size=(5, 5), filters=7,
#                   activation='relu', padding='same'),
#     layers.MaxPooling2D(pool_size=(4, 4)),
#     layers.Flatten(),
#     layers.Dense(num_classes, activation='softmax')
# ])

model = keras.Sequential(
    [
        layers.Conv2D(input_shape=(rows,cols,1), kernel_size=(5, 5), filters=10,
                      activation='relu', padding='same'),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(20, (5, 5), activation='relu'),
        layers.Dropout(0.5),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dense(20*4*4, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])


# compile 
model.compile(loss=tf.losses.CategoricalCrossentropy(), optimizer='adam',metrics=[tf.metrics.CategoricalAccuracy()])

model.summary()

### Visualize the Model

I'm using the amazing NN svg generator by [Alex Lenail](http://alexlenail.me/NN-SVG/LeNet.html)

In [None]:
import ipyplot
ipyplot.plot_images(['../data/convolutional_network.svg'],img_width=800)

In [None]:
batch_size = 64
epochs = 10
training = {}

training = model.fit(x_train,y_train_hot, batch_size=batch_size, epochs=epochs,
                     verbose=1, validation_data=(x_test,y_test_hot))

In [None]:
# look into training history
fig,ax = plt.subplots(2,1, sharex=True, figsize=(5,5))

score = model.evaluate(x_test, y_test_hot, verbose=0);

# accuracy
ax[0].plot(training.history['categorical_accuracy'], color=colors[0])
ax[0].plot(training.history['val_categorical_accuracy'], ls='--', color=colors[-3])
ax[0].set_ylabel('model accuracy')
ax[0].legend(['train', 'test'], loc='best')
ax[0].text(0.5,0.95,f'{score[1]:.2f}',horizontalalignment='center',verticalalignment='top', 
                         transform=ax[0].transAxes)
ax[0].set_ylim(top=1)

# loss
ax[1].plot(training.history['loss'], color=colors[0])
ax[1].plot(training.history['val_loss'], ls='--', color=colors[-3])
ax[1].set_ylabel('model loss')
ax[1].set_xlabel('epoch')
ax[1].set_ylim(bottom=0)
ax[1].text(0.5,0.95,f'{score[0]:.2f}',horizontalalignment='center',verticalalignment='top', 
                         transform=ax[1].transAxes)
ax[1].legend(['train', 'test'], loc='best');

In [None]:
print(f'Test loss:     {score[0]:>5.3f}')
print(f'Test accuracy: {100*score[1]:5.2f}%')

### Investigate Predictions

In [None]:
predictions_prob_train = model(x_train)
predictions_prob_test = model(x_test)

predictions_train = np.argmax(predictions_prob_train,axis=1)
predictions_test = np.argmax(predictions_prob_test,axis=1)

mistakes_train = np.where(predictions_train != y_train)[0]
mistakes_test = np.where(predictions_test != y_test)[0]

num_mistakes_train,num_mistakes_test = len(mistakes_train),len(mistakes_test)

print(f'Train Mistakes: {100*num_mistakes_train/x_train.shape[0]:.2f}%')
print(f'Test Mistakes : {100*num_mistakes_test/x_test.shape[0]:.2f}%')

In [None]:
def plot_digit_array(x,y, show_prediction=False):
    '''Expects a list of digits (x) and associated labels (y)'''
    
    # determine the number of rows and columns of our image array
    num_digits = x.shape[0]
    num_cols = int(np.sqrt(num_digits))
    num_rows = num_digits//num_cols + 1

    fig,ax = plt.subplots(nrows=num_rows,ncols=num_cols,sharex=True,sharey=True,
                          figsize=(num_cols,num_rows))
    
    # plot all the numbers
    for i,cax in enumerate(ax.flatten()):
        if i < num_digits:
            cax.matshow(x[i].reshape(28,28), cmap='binary')
            cax.axis('off')
            if show_prediction:
                cax.text(0.99,0.99,f'{y[i]}',horizontalalignment='right',verticalalignment='top', 
                         transform=cax.transAxes, fontsize=8, color='r')
        else:
            cax.axis('off')

In [None]:
plot_digit_array(x_test[mistakes_test],predictions_test[mistakes_test],show_prediction=True)

### Investigate how well our network generalizes

Let's first determine the cases where we predicted the correct label

In [None]:
correct_train = np.where(predictions_train == y_train)[0]
correct_test = np.where(predictions_test == y_test)[0]

In [None]:
from matplotlib import gridspec

idx = np.random.choice(correct_test)

fig = plt.figure(figsize=(3,1.2*3),constrained_layout=True) 
gs = gridspec.GridSpec(2, 1, height_ratios=[1, 5], figure=fig) 

ax = [plt.subplot(gs[0]),plt.subplot(gs[1])]

ax[0].bar(range(num_classes),predictions_prob_test[idx], color='r')
ax[0].set_xticks(range(num_classes))
ax[0].set_yticks([]);
ax[0].set_xlim(-0.5,9.5)

ax[1].matshow(x_test[idx,:,:,0], cmap='binary')
ax[1].text(0.99,0.99,f'{predictions_test[idx]}',horizontalalignment='right',verticalalignment='top', 
                         transform=ax[1].transAxes, color='r')
ax[1].set_xticks([]);
ax[1].set_yticks([]);

### Let's see what happens if we translate the image

In [None]:
shift = -3
translated_image = np.roll(x_test[idx,:,:,0], shift, axis=[0, 1])

# make a prediction
predict = model(translated_image.reshape(1,rows,cols,1))

fig = plt.figure(figsize=(3,1.2*3),constrained_layout=True) 
gs = gridspec.GridSpec(2, 1, height_ratios=[1, 5], figure=fig) 

ax = [plt.subplot(gs[0]),plt.subplot(gs[1])]

ax[0].bar(range(num_classes),predict[0,:], color='r')
ax[0].set_xticks(range(num_classes))
ax[0].set_yticks([]);
ax[0].set_xlim(-0.5,9.5)

ax[1].matshow(translated_image, cmap='binary')

ax[1].text(0.99,0.99,f'{np.argmax(predict,axis=1)[0]}',horizontalalignment='right',verticalalignment='top', 
                         transform=ax[1].transAxes, color='r')
ax[1].set_xticks([]);
ax[1].set_yticks([]);

### Or rotate it

In [None]:
from scipy import ndimage

angle = 35
rotated_digit = np.abs(ndimage.rotate(x_test[idx,:,:,0], angle, reshape=False)).astype(float)
rotated_digit -= np.min(rotated_digit)
rotated_digit /= np.max(rotated_digit)

# make a prediction
predict = model(rotated_digit.reshape(1,rows,cols,1))

fig = plt.figure(figsize=(3,1.2*3),constrained_layout=True) 
gs = gridspec.GridSpec(2, 1, height_ratios=[1, 5], figure=fig) 

ax = [plt.subplot(gs[0]),plt.subplot(gs[1])]

ax[0].bar(range(num_classes),predict[0,:], color='r')
ax[0].set_xticks(range(num_classes))
ax[0].set_yticks([]);
ax[0].set_xlim(-0.5,9.5)

ax[1].matshow(rotated_digit, cmap='binary')

ax[1].text(0.99,0.99,f'{np.argmax(predict,axis=1)[0]}',horizontalalignment='right',verticalalignment='top', 
                         transform=ax[1].transAxes, color='r')
ax[1].set_xticks([]);
ax[1].set_yticks([]);
