# Polar model / Trained on rotated MNIST / Tested on rotated MNIST

In [1]:
from tensorflow.keras import datasets, layers, models

import cv2 as cv
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import os
import matplotlib.pyplot as plt
import time

In [2]:
# To run on GPU, can be omitted for CPU only
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [3]:
# Function to get vertical cylinder effect - mentioned in paper
def padImage(image, pixels=5):
    bottom = image[-pixels:]
    top = image[:pixels]

    img = np.insert(image, 0, bottom, 0)
    img = np.insert(img, len(img), top, 0)
    return img

In [4]:
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# Normalize pixel values to be between 0 and 1
train_images, test_images = (train_images / 255.0).astype(np.float32), test_images.astype(np.float32) / 255.0

train_images = np.reshape(train_images, (-1, 28, 28))
test_images = np.reshape(test_images, (-1, 28, 28))

## Prepare Dataset

In [5]:
### THE PAD HAS TO BE DONE IN THE
### POLAR SPACE

# 20 is the ceiling of (14 * sqrt(2)), to get length of diagonal of image, ensures we get all pixels in the image
# - mentioned in paper
# Rotate train set
X_train_polar = [tfa.image.rotate(x, np.random.uniform(-np.pi/2., np.pi/2.)).numpy() for x in train_images]
X_train_polar = [cv.linearPolar(x, tuple(np.array(x.shape)/2), 20, cv.WARP_FILL_OUTLIERS) for x in X_train_polar]
X_train_polar = [padImage(x, pixels=5) for x in X_train_polar]
X_train_polar = np.array(X_train_polar)[...,None]

X_test_polar = [cv.linearPolar(x, tuple(np.array(x.shape)/2), 20, cv.WARP_FILL_OUTLIERS) for x in test_images]
X_test_polar = [padImage(x, pixels=5) for x in X_test_polar]
X_test_polar = np.array(X_test_polar)[...,None]

# Rotate test set
X_test_r_polar = [tfa.image.rotate(x, np.random.uniform(-np.pi/2., np.pi/2.)).numpy() for x in test_images]
X_test_r_polar = [cv.linearPolar(x, tuple(np.array(x.shape)/2), 20, cv.WARP_FILL_OUTLIERS) for x in X_test_r_polar]
X_test_r_polar = [padImage(x, pixels=5) for x in X_test_r_polar]
X_test_r_polar = np.array(X_test_r_polar)[...,None]

In [6]:
X_test_r_polar.shape

(10000, 38, 28, 1)

## Model architecture

In [7]:
model = models.Sequential()
model.add(layers.Input(shape=X_train_polar.shape[1:]))
model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(layers.MaxPooling2D())
model.add(layers.LayerNormalization(axis=-1, epsilon=0.001, center=True, scale=True))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(layers.MaxPooling2D())
model.add(layers.LayerNormalization(axis=-1, epsilon=0.001, center=True, scale=True))
model.add(layers.Dropout(rate=0.5))
model.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same'))

model.add(layers.GlobalMaxPooling2D())
model.add(layers.Dense(64, activation='linear'))
model.add(layers.Activation('relu'))
model.add(layers.Dense(10))

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 38, 28, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 19, 14, 32)        0         
_________________________________________________________________
layer_normalization (LayerNo (None, 19, 14, 32)        64        
_________________________________________________________________
dropout (Dropout)            (None, 19, 14, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 19, 14, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 9, 7, 64)          0         
_________________________________________________________________
layer_normalization_1 (Layer (None, 9, 7, 64)          1

## Model Training

In [9]:
name = 'polar_MNIST-r'
checkpoint = tf.keras.callbacks.ModelCheckpoint(name + '.h5', verbose=1, save_best_only=True, monitor='val_accuracy', mode='max')

opt = tf.keras.optimizers.Adam(learning_rate=0.0001)

model.compile(optimizer=opt,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'], )
model.fit(X_train_polar, train_labels, batch_size=32, epochs=50, 
          validation_data=(X_test_r_polar, test_labels),
          callbacks=[checkpoint])

Epoch 1/50

Epoch 00001: val_accuracy improved from -inf to 0.84170, saving model to polar_MNIST-r.h5
Epoch 2/50

Epoch 00002: val_accuracy improved from 0.84170 to 0.91400, saving model to polar_MNIST-r.h5
Epoch 3/50

Epoch 00003: val_accuracy improved from 0.91400 to 0.92770, saving model to polar_MNIST-r.h5
Epoch 4/50

Epoch 00004: val_accuracy improved from 0.92770 to 0.94570, saving model to polar_MNIST-r.h5
Epoch 5/50

Epoch 00005: val_accuracy improved from 0.94570 to 0.95040, saving model to polar_MNIST-r.h5
Epoch 6/50

Epoch 00006: val_accuracy improved from 0.95040 to 0.96220, saving model to polar_MNIST-r.h5
Epoch 7/50

Epoch 00007: val_accuracy did not improve from 0.96220
Epoch 8/50

Epoch 00008: val_accuracy improved from 0.96220 to 0.96670, saving model to polar_MNIST-r.h5
Epoch 9/50

Epoch 00009: val_accuracy improved from 0.96670 to 0.96860, saving model to polar_MNIST-r.h5
Epoch 10/50

Epoch 00010: val_accuracy improved from 0.96860 to 0.96990, saving model to polar_M

<tensorflow.python.keras.callbacks.History at 0x27569223700>

## Model Accuracies

In [10]:
# load best epoch model
m = tf.keras.models.load_model('polar_MNIST-r.h5')
# Plain accuracy
accuracy = np.mean(m.predict(X_test_polar).argmax(axis=1) == test_labels)*100
# "Rotated" accuracy
accuracy_rotated = np.mean(m.predict(X_test_r_polar).argmax(axis=1) == test_labels)*100

print(accuracy, accuracy_rotated)

98.22 98.11999999999999


## Prediction time for test set

In [11]:
times = []

for _ in range(0, 50):
    start = time.time()

    model.predict(X_test_polar)

    end = time.time()
    times.append(end - start)

print(np.mean(times))



0.6377511358261109


## Prediction time per image

In [12]:
np.mean(times) / len(X_test_polar)

6.377511358261108e-05