# Four tasks

* ### Implement forward and backward for FCLayer (`layers/fc_layer.py`)
* ### Implement forward and backward for ReLULayer (`layers/relu_layer.py`)
* ### Implement forward and backward for SigmoidLayer (`layers/sigmoid_layer.py`)
* ### Implement sgd with momentum (`optimizer.py`)

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
from network import Network

from builtin import BuiltInFCLayer, BuiltInReLULayer, BuiltInSigmoidLayer, BuiltInSGDwithMomentum

from layers import FCLayer, ReLULayer, SigmoidLayer

from criterion import SoftmaxCrossEntropy
from optimizer import SGD, SGDwithMomentum

from solver import train, test
from plot import plot_loss_and_acc

In [2]:
%matplotlib inline

### Set Hyperparameters

In [3]:
batch_size = 100
max_epoch = 20
init_std = 0.01

learning_rate = 0.1
weight_decay = 0.00005
momentum = 0.9

disp_freq = 50

### Dataset

In [4]:
mnist = input_data.read_data_sets("MNIST_data/", validation_size=5000, one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Criterion and Optimizer

In [5]:
criterion = SoftmaxCrossEntropy()
sgd = SGD(learning_rate, weight_decay)
sgd_momentum = BuiltInSGDwithMomentum(learning_rate, weight_decay, momentum)

# MLP with sigmoid activation function 

Build and train a MLP with one hidden layer with 256 units using sigmoid activation function and crossentropy loss

## Before executing the following code, you should implement FCLayer and SigmoidLayer.

### Build the model (see Framework_demo.ipynb)

In [6]:
SigmoidMLP = Network()
# TODO build SigmoidMLP with FCLayer and SigmoidLayer
SigmoidMLP.add(FCLayer(784, 256, init_std))
SigmoidMLP.add(SigmoidLayer())
SigmoidMLP.add(FCLayer(256, 10, init_std))


# Criterion
criterion = SoftmaxCrossEntropy()

# Optimizer
optimizer = SGD(learning_rate, weight_decay)

### Train

In [None]:
SigmoidMLP, sigmoid_loss, sigmoid_acc = train(SigmoidMLP, criterion, sgd, mnist, max_epoch, batch_size, disp_freq)

Epoch [0][20]	 Batch [0][550]	 Training Loss 2.2998	 Accuracy 0.0400
Epoch [0][20]	 Batch [50][550]	 Training Loss 2.3024	 Accuracy 0.1131
Epoch [0][20]	 Batch [100][550]	 Training Loss 2.3025	 Accuracy 0.1090
Epoch [0][20]	 Batch [150][550]	 Training Loss 2.3024	 Accuracy 0.1107
Epoch [0][20]	 Batch [200][550]	 Training Loss 2.3021	 Accuracy 0.1120
Epoch [0][20]	 Batch [250][550]	 Training Loss 2.3021	 Accuracy 0.1116
Epoch [0][20]	 Batch [300][550]	 Training Loss 2.3019	 Accuracy 0.1125
Epoch [0][20]	 Batch [350][550]	 Training Loss 2.3017	 Accuracy 0.1121
Epoch [0][20]	 Batch [400][550]	 Training Loss 2.3017	 Accuracy 0.1116
Epoch [0][20]	 Batch [450][550]	 Training Loss 2.3016	 Accuracy 0.1123
Epoch [0][20]	 Batch [500][550]	 Training Loss 2.3016	 Accuracy 0.1126
Epoch [0]	 Average training loss 2.3016	 Average training accuracy 0.1121
Epoch [0]	 Average validation loss 2.3010	 Average validation accuracy 0.1126

Epoch [1][20]	 Batch [0][550]	 Training Loss 2.2925	 Accuracy 0.1300


  Output = 1 / (1 + np.exp(-Input))


Epoch [1][20]	 Batch [50][550]	 Training Loss 2.3004	 Accuracy 0.1208
Epoch [1][20]	 Batch [100][550]	 Training Loss 2.3018	 Accuracy 0.1138
Epoch [1][20]	 Batch [150][550]	 Training Loss 2.3015	 Accuracy 0.1140
Epoch [1][20]	 Batch [200][550]	 Training Loss 2.3011	 Accuracy 0.1160
Epoch [1][20]	 Batch [250][550]	 Training Loss 2.3012	 Accuracy 0.1156
Epoch [1][20]	 Batch [300][550]	 Training Loss 2.3013	 Accuracy 0.1141
Epoch [1][20]	 Batch [350][550]	 Training Loss 2.3015	 Accuracy 0.1130
Epoch [1][20]	 Batch [400][550]	 Training Loss 2.3014	 Accuracy 0.1129
Epoch [1][20]	 Batch [450][550]	 Training Loss 2.3014	 Accuracy 0.1129
Epoch [1][20]	 Batch [500][550]	 Training Loss 2.3015	 Accuracy 0.1127
Epoch [1]	 Average training loss 2.3015	 Average training accuracy 0.1123
Epoch [1]	 Average validation loss 2.3013	 Average validation accuracy 0.1126

Epoch [2][20]	 Batch [0][550]	 Training Loss 2.3108	 Accuracy 0.1000
Epoch [2][20]	 Batch [50][550]	 Training Loss 2.3019	 Accuracy 0.1133

### Test

In [None]:
test(SigmoidMLP, criterion, mnist, batch_size, disp_freq)

# MLP with ReLU activation function 

Build and train a MLP with one hidden layer with 256 units using ReLU activation function and crossentropy loss

## Before executing the following code, you should implement **ReLULayer**.

### Build the model

In [None]:
ReLUMLP = Network()
# TODO build SigmoidMLP with FCLayer and ReLULayer
ReLUMLP.add(FCLayer(784, 256, init_std))
ReLUMLP.add(ReLULayer())
ReLUMLP.add(FCLayer(256, 10, init_std))


# Criterion
criterion = SoftmaxCrossEntropy()

# Optimizer
optimizer = SGD(learning_rate, weight_decay)

### Train

In [None]:
ReLUMLP, ReLU_loss, ReLU_acc = train(ReLUMLP, criterion, sgd, mnist, max_epoch, batch_size, disp_freq)

### Test

In [None]:
test(ReLUMLP, criterion, mnist, batch_size, disp_freq)

## Plot

In [None]:
plot_loss_and_acc({'Sigmoid': [sigmoid_loss, sigmoid_acc], 'ReLU': [ReLU_loss, ReLU_acc]})

# Sgd with momentum

## Before executing the following code, you should implement SGDwithMomentum

In [None]:
ReLUMLP_momentum = Network()

# TODO build ReLUMLP with FCLayer and ReLULayer


## Train

In [None]:
ReLUMLP_momentum, momentum_loss, momentum_acc = train(ReLUMLP_momentum, criterion, sgd_momentum, mnist, max_epoch, batch_size, disp_freq)

### Test

In [None]:
test(ReLUMLP_momentum, criterion, mnist, batch_size, disp_freq)

In [None]:
plot_loss_and_acc({'Sigmoid': [sigmoid_loss, sigmoid_acc],
                   'ReLU': [ReLU_loss, ReLU_acc], 
                   'ReLU_Momentum': [momentum_loss, momentum_acc]})