In [None]:
%matplotlib notebook

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import tensorflow as tf
import tensorflow.keras as keras

# Image classification

We want to train a neural network on the **MNIST** database and compare the results with the one obtained by using the kNN method.

We will do the following steps in order:

1. Load the MNIST training and test datasets using tensorflow_datasets
2. Define the Neural Network
3. Define a loss function and an optimizer
4. Prepare and optimize train dataset
5. Train the network on the training data
6. Test the network on the test data

In [None]:
import matplotlib.pyplot as plt

# Display one image
def show_image(ax, image, title=None):
    ax.imshow(image, cmap='Greys')
    ax.tick_params(axis='both', which='both', bottom=False, top=False, labelbottom=False, right=False, left=False, labelleft=False)
    if title is not None:
        ax.set_title(title)

# Display some images from the given dataset
def show_mnist(dataset, nrow, ncol):
    plt.figure(figsize=(1.2*ncol, 1.4*nrow))
    for i, (image, label) in dataset.enumerate():
        ax = plt.subplot(int(nrow), int(ncol), int(i+1))
        show_image(ax, image, title=f"This is a {label}")

## Full dataset
We first apply these steps by optimizing the model on the full dataset at each iteration:

In [None]:
# 1. Loading MNIST traning and testing dataset

import tensorflow_datasets as tfds

(train_dataset, test_dataset), ds_info = tfds.load(
    'mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

print(ds_info)

In [None]:
ntrain = ds_info.splits['train'].num_examples
ntest = ds_info.splits['test'].num_examples

In [None]:
# Examples of data
show_mnist(train_dataset.shuffle(ntrain).take(3*8), 3, 8)

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Rescaling, Flatten

In [None]:
# 2. Define the model

# Images are 28x28 pixels. This is the number of inputs.
img_height = ds_info.features['image'].shape[0]
img_width = ds_info.features['image'].shape[1]
n_out = ds_info.features['label'].num_classes

# Feed-forward neural network
model = Sequential()
model.add(Rescaling(1./255, input_shape=(img_height, img_width, 1)))   # Rescaling image data ([0, 255] -> [0, 1])
model.add(Flatten())                                                   # From 2D image to 1D
model.add(Dense(img_height*img_width, activation='relu'))  # Dense matrix of size 28x28 with ReLU activation layer
model.add(Dense(n_out, activation='linear'))               # Dense matrix of size 28x10 with linear activation layer

model.summary()

In [None]:
from tensorflow.keras.optimizers import Adam
from keras.losses import SparseCategoricalCrossentropy
from keras.metrics import SparseCategoricalAccuracy

In [None]:
# 3. Define a loss function and an optimizer

model.compile(optimizer = Adam(learning_rate=1e-3),
              loss = SparseCategoricalCrossentropy(from_logits=True),
              metrics=[SparseCategoricalAccuracy()])


In [None]:
# 4. Prepare train dataset

batch_size = 50

dataset = train_dataset.cache()
dataset = dataset.shuffle(ntrain, reshuffle_each_iteration=True)
dataset = dataset.batch(batch_size, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.prefetch(tf.data.AUTOTUNE)

In [None]:
# 5. Train the neural network

epochs = 10
model.fit(dataset, epochs=epochs, verbose=1)

In [None]:
# 6. Evaluate the model on test dataset

testset = test_dataset.batch(batch_size).prefetch(128)
model.evaluate(testset)

# Red & blue dots classification

Testing the circle configuration using [TensorFlow playground](https://playground.tensorflow.org/):
- perceptron
- consecutive linear layers (with linear activation)
- 2 hidden layers of 4 and 2 neurons
- ReLU vs tanh
- perceptron with augmented input dimension ($x^2$ and $y^2$) $\Rightarrow$ **linearly separable**

# Approximating a function

Given a function $f: \mathbb{R}^N \to \mathbb{R}^M$, it is possible to approximate it by a neural network with $N$ inputs and $M$ outputs, trained on a sample of couples $(x, f(x))$.

What kind of function can we approximate ?

For a **1-layer** neural network (input directly connected to the output through a linear layer), we can only well approximate a **linear function**.

But with a **2-layer** neural network with a "sigmoid-like" hidden activation layer, we can approximate arbitrarily well **any continuous function** $f: \mathbb{R}^N \to \mathbb{R}^M$, provided many sufficient units in the hidden layer (see https://www.sciencedirect.com/science/article/abs/pii/0893608089900208 and https://link.springer.com/article/10.1007/BF02551274).

Take a look at http://cs231n.github.io/neural-networks-1/#power and https://calcul.math.cnrs.fr/attachments/evt_sci/2019-05-mini-symposium-smai/smai_2019_gribonval.pdf to learn more about the **approximation space** of neural networks.

# $\mathbb{R}$ to $\mathbb{R}$ functions

We want to fit a set of points $(x_k, y_k)$ by a model $m_{\theta}$, where $\theta$ are parameters to estimate. The function we want to minimize is the following:
$$
J(\mathbf{\theta}) = \frac{1}{2 K} \sum_{k=1}^K \left(m_{\theta}(x_k) - y_k\right)^2 .
$$

This function is the loss function. Note that a **2-layer** neural network model $m$ with $k$ neurons in the hidden layer and the activation function $\varphi$ can be written as:

$$
m(x) = \sum_{i=1}^k c_i \varphi(a_i x + b_i) + d_i .
$$

In [None]:
# Write a function f that you want to approximate


In [None]:
# Create a numpy array x containing 50 points in your domain ([-1, 1] for example)

# Apply your function f to get an array y. Add some noise to the data.
# (add a random number drawn from a gaussian distribution to each element of y)


In [None]:
# Plot of the function f and the points (x_k, y_k)


In [None]:
# Create a training dataset from the numpy arrays x and y. 
# x are the training data
# y are the labels


# cache, shuffle, batch and prefetch the dataset


In [None]:
# Create a Sequential neural network with some Dense layers (2 or 3).
# This neural network takes input of size 1 (one real number x_k) and returns a scalar value (one real number y_k)
# Use the activation function of your choice


In [None]:
# Define an optimizer and a loss function for your model.
# The loss function is given at the beginning of this part and already exists in Tensorflow (not a custom loss)


In [None]:
# Train your model (size of batches and number of epochs are up to you)


In [None]:
# Create some test data x_test and y_test

# Evaluate your model with these test data


In [None]:
# Plot the results :
# Create an array of evenly spaced (x_i) for plotting
# Apply your model with these points x_i and plot the curve of your model (x_i, y_i)
# Plot the 50 points (x_k, y_k)
