# 1. Import libraries

In [1]:
import os
import sys
import numpy as np
import gzip
import pandas as pd
from time import time
print("OS: ", sys.platform)
print("Python: ", sys.version)
# MXnet
import mxnet as mx
from mxnet import nd, autograd
from mxnet import gluon
from mxnet.gluon import nn
print("MXNet version", mx.__version__) # Matteo 1.5.1
# Tensorflow
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
print("Tensorflow version (by Google): ", tf.__version__)


OS:  darwin
Python:  3.7.6 (v3.7.6:43364a7ae0, Dec 18 2019, 14:18:50) 
[Clang 6.0 (clang-600.0.57)]
MXNet version 1.5.1
Tensorflow version (by Google):  2.1.0


# Set GPU usage

In [2]:
# MXNET
gpus = mx.test_utils.list_gpus()
ctx =  [mx.gpu()] if gpus else [mx.cpu(0), mx.cpu(1)]
print(ctx)

[cpu(0), cpu(1)]


In [3]:
# TENSORFLOW

# Control reproducibility

The most common form of randomness used in neural networks is the random initialization of the network weights. Although randomness can be used in other areas, here is just a short list:

- Randomness in Initialization, such as weights.
- Randomness in Regularization, such as dropout.
- Randomness in Layers, such as word embedding.
- Randomness in Optimization, such as stochastic optimization.

source: https://machinelearningmastery.com/reproducible-results-neural-networks-keras/

In [4]:
import random
np.random.seed(42)
random.seed(42)
for computing_unit in ctx:
    mx.random.seed(42, ctx = computing_unit)
tf.random.set_seed(42)

# Assignment
1. explore how is the quality of the two cifar10 dataset
1. are they similar enought?
1. create the train, validation, test split
1. look for a tutorial online that is finetuning a network
1. create the pre-trained networks and attach the last layer
1. train the two networks
1. test the performance on the test set

# 1. Explore the quality of the datasets

In [5]:
train_mx = mx.gluon.data.vision.datasets.CIFAR10(train = True)
test_mx = mx.gluon.data.vision.datasets.CIFAR10(train = False)

dataset_tf = tf.keras.datasets.cifar10.load_data()
train_tf = dataset_tf[0]
test_tf = dataset_tf[1]

In [6]:
#help(dataset_mx)
# dataset_tf[0] <-- 50k train sample
# dataset_tf[1] <-- 10k test
# train_tf = dataset_tf[0]
# train_tf[0] <-- 50k train images (actual pixels)
# train_tf[1] <-- 50k calss label vecror

# train_mx<-- 50k 
# train_mx[0] <-- image at position 0
# train_mx[45][0] <-- image 45 actual pixels
# train_mx[45][1] <-- class label of image 45

# 2. Check for similarity

In [7]:
# check if the first image is the same
first_image_mx = train_mx[0][0]
first_label_mx = train_mx[0][1]
first_image_tf = train_tf[0][0]
first_label_tf = train_tf[1][0]
print("Same label: ", first_label_mx,  first_label_tf)
are_same = np.array_equal(np.array(first_image_tf), np.array(first_image_tf))
print("Same image: ", are_same)

Same label:  6 [6]
Same image:  True


# 3. Given for free by the train - test split

In [8]:
len(train_tf[0])

50000

# 4. Fine tuning tutorial
Tensorflow: 
- https://www.tensorflow.org/tutorials/images/transfer_learning
- https://pylearned.com/Finetuning/

MXNet: 
- https://gluon-cv.mxnet.io/build/examples_classification/transfer_learning_minc.html
- https://d2l.ai/chapter_computer-vision/kaggle-gluon-cifar10.html

In [23]:
# parameters
BATCH_SIZE = 300
EPOCHS = 5
CLASSES = 10
IMAGE_SIZE = 32

### Data preparation

In [24]:
# TENSORFLOW
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input

(x_train_tf, y_train_tf) = train_tf
(x_validation_tf, y_validation_tf) = test_tf[:5000]

# create data generator
train_generator_tf = ImageDataGenerator().flow(x_train_tf, y_train_tf, batch_size=BATCH_SIZE)
validation_generator_tf = ImageDataGenerator().flow(x_validation_tf, y_validation_tf, batch_size=BATCH_SIZE)

(x_test_tf, y_test_tf) = test_tf[-5000:]
y_train = tf.keras.utils.to_categorical(y_train_tf, CLASSES)
y_vali = tf.keras.utils.to_categorical(y_validation_tf, CLASSES)
y_test = tf.keras.utils.to_categorical(y_test_tf, CLASSES)

datagen = ImageDataGenerator()

In [25]:
# MXNET

from mxnet.gluon.data.vision import transforms

transformations = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ToTensor()
])
train_data = gluon.data.DataLoader(
    dataset = train_mx.transform_first(transformations),
    batch_size=BATCH_SIZE, shuffle=True)
test_data = gluon.data.DataLoader(
    dataset = test_mx.transform_first(transformations),
    batch_size=BATCH_SIZE, shuffle=True)

# 5. Create the pre-trained network

In [26]:
# MXNET
from gluoncv.model_zoo import get_model

model_name = "vgg16"
finetune_net_mx = get_model(model_name, pretrained=True)
with finetune_net_mx.name_scope():
    finetune_net_mx.output = nn.Dense(CLASSES)
finetune_net_mx.output.initialize(mx.init.Xavier(), ctx = ctx)
finetune_net_mx.collect_params().reset_ctx(ctx)
finetune_net_mx.hybridize()


In [47]:
# TENSORFLOW
from tensorflow.keras.layers import Input, Flatten, Dense

#Get back the convolutional part of a VGG network trained on ImageNet
model_vgg16_conv = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
model_vgg16_conv.trainable = False

init_tf = tf.keras.initializers.GlorotNormal(seed=1)
prediction_layer = tf.keras.layers.Dense(units = CLASSES, activation = 'softmax', kernel_initializer = init_tf, bias_initializer = init_tf)

core = tf.keras.Sequential([
    model_vgg16_conv,
    layers.Flatten(),
    layers.Dense(units=4096, activation='relu', kernel_initializer = init_tf, bias_initializer = init_tf),
    layers.Dense(units=4096, activation='relu', kernel_initializer = init_tf, bias_initializer = init_tf),
    prediction_layer
])

finetune_net_tf = tf.keras.Model(inputs = layers.Input(shape=(32,32, 3),name = 'image_input'), output=core)

TypeError: ('Keyword argument not understood:', 'inputs')

In [44]:
help(tf.keras.Model)

Help on class Model in module tensorflow.python.keras.engine.training:

class Model(tensorflow.python.keras.engine.network.Network)
 |  Model(*args, **kwargs)
 |  
 |  `Model` groups layers into an object with training and inference features.
 |  
 |  There are two ways to instantiate a `Model`:
 |  
 |  1 - With the "functional API", where you start from `Input`,
 |  you chain layer calls to specify the model's forward pass,
 |  and finally you create your model from inputs and outputs:
 |  
 |  ```python
 |  import tensorflow as tf
 |  
 |  inputs = tf.keras.Input(shape=(3,))
 |  x = tf.keras.layers.Dense(4, activation=tf.nn.relu)(inputs)
 |  outputs = tf.keras.layers.Dense(5, activation=tf.nn.softmax)(x)
 |  model = tf.keras.Model(inputs=inputs, outputs=outputs)
 |  ```
 |  
 |  2 - By subclassing the `Model` class: in that case, you should define your
 |  layers in `__init__` and you should implement the model's forward pass
 |  in `call`.
 |  
 |  ```python
 |  import tensorflow a

### Inspect they are they have the same strucuture

In [40]:
finetune_net_tf.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 1, 1, 512)         14714688  
_________________________________________________________________
flatten_3 (Flatten)          (None, 512)               0         
_________________________________________________________________
dense_17 (Dense)             (None, 4096)              2101248   
_________________________________________________________________
dense_18 (Dense)             (None, 4096)              16781312  
_________________________________________________________________
dense_16 (Dense)             (None, 10)                40970     
Total params: 33,638,218
Trainable params: 18,923,530
Non-trainable params: 14,714,688
_________________________________________________________________


# 6. Train

In [None]:
%%time
# TENSORFLOW
chosen_tf_optimizer = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
finetune_net_tf.compile(loss=keras.losses.categorical_crossentropy, optimizer=chosen_tf_optimizer, metrics=['accuracy'])
steps_per_epoch = x_train_tf.shape[0]//BATCH_SIZE
validation_steps = x_validation_tf.shape[0]//BATCH_SIZE
finetune_net_tf.fit(train_generator_tf, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, 
                    validation_data=validation_generator_tf, validation_steps=validation_steps, 
                    shuffle=True, callbacks=[])

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 166 steps, validate for 33 steps
Epoch 1/5

In [None]:
# MXNET
finetune_net_mx.initialize(mx.init.Xavier(), ctx=ctx, force_reinit=True)
#handwritten_net(init = mx.init.Xavier(), ctx=ctx)
optim = mx.optimizer.Adam(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, lazy_update=True)
trainer = gluon.Trainer(finetune_net_mx.collect_params(), optim)
# Use Accuracy as the evaluation metric.
metric = mx.metric.Accuracy()
L = gluon.loss.SoftmaxCrossEntropyLoss()

num_batch = len(train_data)

for epoch in range(EPOCHS):
    tic = time()
    train_loss = 0
    metric.reset()

    for i, batch in enumerate(train_data):
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        with autograd.record():
            outputs = [finetune_net_mx(X) for X in data]
            loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
        for l in loss:
            l.backward()

        trainer.step(BATCH_SIZE)
        train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)

        metric.update(label, outputs)

    _, train_acc = metric.get()
    train_loss /= num_batch

    _, val_acc = test(finetune_net_mx, val_data, ctx)

    print('[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f | time: %.1f' %
             (epoch, train_acc, train_loss, val_acc, time.time() - tic))

_, test_acc = test(finetune_net_mx, test_data, ctx)
print('[Finished] Test-acc: %.3f' % (test_acc))

# 7. Evaluate on test

In [None]:
# TENSORFLOW

In [None]:
# MXNET
def test(net, val_data, ctx):
    metric = mx.metric.Accuracy()
    for i, batch in enumerate(val_data):
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        outputs = [net(X) for X in data]
        metric.update(label, outputs)

    return metric.get()

# 2. Read dataset - General Train/Test split

In [None]:
def read_mnist(images_path: str, labels_path: str):
    #mnist_path = "data/mnist/"
    #images_path = mnist_path + images_path
    print(images_path)
    with gzip.open(labels_path, 'rb') as labelsFile:
        labels = np.frombuffer(labelsFile.read(), dtype=np.uint8, offset=8)

    with gzip.open(images_path,'rb') as imagesFile:
        length = len(labels)
        # Load flat 28x28 px images (784 px), and convert them to 28x28 px
        features = np.frombuffer(imagesFile.read(), dtype=np.uint8, offset=16) \
                        .reshape(length, 784) \
                        .reshape(length, 28, 28, 1)
    return features, labels

In [None]:
# LOAD TRAIN AND TEST ALREADY SPLIT
train = {}
test = {}
train['features'], train['labels'] = read_mnist('train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz')
test['features'], test['labels'] = read_mnist('t10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz')
print(test['features'].shape[0], '-> # of test images.')
print(train['features'].shape[0], '-> # of training images (train + validation).')
# CREATE TRAIN AND VALIDATION SPLIT
validation = {}
train['features'], validation['features'], train['labels'], validation['labels'] = train_test_split(train['features'], train['labels'], test_size=0.2, random_state=0)
print("    ", train['features'].shape[0], '-> # of (actual) training images.')
print("    ", validation['features'].shape[0], '-> # of validation images.')

# 3. Create a reader for each Framework

In [None]:
# GENERAL PARAMETERS
EPOCHS = 3
BATCH_SIZE = 200

In [None]:
# MXNET
# convert from NHWC to NCHW that is used by MXNET
# https://stackoverflow.com/questions/37689423/convert-between-nhwc-and-nchw-in-tensorflow
X_train_mx = mx.ndarray.transpose(mx.nd.array(train['features']), axes=(0, 3, 1, 2))
y_train_mx = mx.nd.array(train['labels'])
X_validation_mx = mx.ndarray.transpose(mx.nd.array(validation['features']), axes=(0, 3, 1, 2))
y_validation_mx = mx.nd.array(validation['labels'])
X_test_mx = mx.ndarray.transpose(mx.nd.array(test['features']), axes=(0, 3, 1, 2))
y_test_mx = mx.nd.array(test['labels'])
# create data iterator
train_data_mx = mx.io.NDArrayIter(X_train_mx.asnumpy(), y_train_mx.asnumpy(), BATCH_SIZE, shuffle=True)
val_data_mx = mx.io.NDArrayIter(X_validation_mx.asnumpy(), y_validation_mx.asnumpy(), BATCH_SIZE)
test_data_mx = mx.io.NDArrayIter(X_test_mx.asnumpy(), y_test_mx.asnumpy(), BATCH_SIZE)

In [None]:
X_train_mx.shape

In [None]:
type(X_train_mx.asnumpy())

In [None]:
# TENSORFLOW
# convert in multiple output for tensorflow
X_train_tf, y_train_tf = train['features'], to_categorical(train['labels'])
X_validation_tf, y_validation_tf = validation['features'], to_categorical(validation['labels'])
# create data generator
train_generator_tf = ImageDataGenerator().flow(X_train_tf, y_train_tf, batch_size=BATCH_SIZE)
validation_generator_tf = ImageDataGenerator().flow(X_validation_tf, y_validation_tf, batch_size=BATCH_SIZE)

In [None]:
X_train_tf.shape

# 4. Create models

In [None]:
# MXNET -> GLUON
# IDENTICAL TO LeNet paper: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf 
model_mx = nn.HybridSequential()
model_mx.add(nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
        nn.AvgPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=16, kernel_size=3, activation='relu'),
        nn.AvgPool2D(pool_size=2, strides=2),
        nn.Flatten(),
        nn.Dense(120, activation="relu"),
        nn.Dense(84, activation="relu"),
        nn.Dense(10))

In [None]:
# TENSORFLOW -> KERAS
model_tf = keras.Sequential()
init_tf = tf.keras.initializers.GlorotNormal(seed=1)
model_tf.add(layers.Conv2D(filters=6, kernel_size=(5, 5), activation='relu', input_shape=(28,28,1), kernel_initializer = init_tf, bias_initializer = init_tf))
model_tf.add(layers.AveragePooling2D(pool_size=(2, 2), strides=2))
model_tf.add(layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu', kernel_initializer = init_tf, bias_initializer = init_tf))
model_tf.add(layers.AveragePooling2D(pool_size=(2, 2), strides=2))
model_tf.add(layers.Flatten())
model_tf.add(layers.Dense(units=120, activation='relu', kernel_initializer = init_tf, bias_initializer = init_tf))
model_tf.add(layers.Dense(units=84, activation='relu', kernel_initializer = init_tf, bias_initializer = init_tf))
model_tf.add(layers.Dense(units=10, activation = 'softmax', kernel_initializer = init_tf, bias_initializer = init_tf))
#model.summary()

In [None]:
#help(layers.Dense)

# Optimization on/off

In [None]:
# MXNET
model_mx.hybridize()

In [None]:
# TENSORFLOW


# 5. Train Models

In [None]:
%%time
# MXNET
def training_procedure(handwritten_net, train_data):
    global EPOCHS
    global ctx
    handwritten_net.initialize(mx.init.Xavier(), ctx=ctx, force_reinit=True)
    #handwritten_net(init = mx.init.Xavier(), ctx=ctx)
    optim = mx.optimizer.Adam(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, lazy_update=True)
    trainer = gluon.Trainer(handwritten_net.collect_params(), optim)
    # Use Accuracy as the evaluation metric.
    metric = mx.metric.Accuracy()
    softmax_cross_entropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()
    
    for i in range(EPOCHS):
        # Reset the train data iterator.
        train_data.reset()
        # Loop over the train data iterator.
        for batch in train_data:
            # Splits train data into multiple slices along batch_axis
            # and copy each slice into a context.
            data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            # Splits train labels into multiple slices along batch_axis
            # and copy each slice into a context.
            label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
            outputs = []
            # Inside training scope
            with autograd.record():
                for x, y in zip(data, label):
                    z = handwritten_net(x)
                    # Computes softmax cross entropy loss.
                    loss = softmax_cross_entropy_loss(z, y)
                    # Backpropogate the error for one iteration.
                    loss.backward()
                    outputs.append(z)
            # Updates internal evaluation
            metric.update(label, outputs)
            # Make one step of parameter update. Trainer needs to know the
            # batch size of data to normalize the gradient by 1/batch_size.
            trainer.step(batch.data[0].shape[0])
        # Gets the evaluation result.
        name, acc = metric.get()
        # Reset evaluation result to initial state.
        metric.reset()
        print('training acc at epoch %d: %s=%f'%(i, name, acc))
    return handwritten_net

trained_model_mx = training_procedure(model_mx, train_data_mx)

In [None]:
%%time
# TENSORFLOW
chosen_tf_optimizer = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model_tf.compile(loss=keras.losses.categorical_crossentropy, optimizer=chosen_tf_optimizer, metrics=['accuracy'])
steps_per_epoch = X_train_tf.shape[0]//BATCH_SIZE
validation_steps = X_validation_tf.shape[0]//BATCH_SIZE
model_tf.fit_generator(train_generator_tf, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, 
                    validation_data=validation_generator_tf, validation_steps=validation_steps, 
                    shuffle=True, callbacks=[])

# 6. Evaluate models

In [None]:
%%time
# MXNET
# TEST THE NETWORK
metric = mx.metric.Accuracy()
# Reset the test data iterator.
test_data_mx.reset()
# Loop over the test data iterator.
for batch in test_data_mx:
    # Splits test data into multiple slices along batch_axis
    # and copy each slice into a context.
    data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
    # Splits validation label into multiple slices along batch_axis
    # and copy each slice into a context.
    label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
    outputs = []
    for x in data:
        outputs.append(model_mx(x))
    # Updates internal evaluation
    metric.update(label, outputs)
print('MXnet - Test %s : %f'%metric.get())
assert metric.get()[1] > 0.90

In [None]:
%%time
# TENSORFLOW
score = model_tf.evaluate(test['features'], to_categorical(test['labels']), verbose=0)
#print('Test loss:', score[0])
print('TensorFlow - Test accuracy:', score[1])
assert score[1] > 0.90