# Homework-  26.11.2018:
## State of the Art Neural Network Architectures

The purpose of this homework is to implement and evaluate the sota architectures presented in the lecture.
However, you are encouraged to try your own layer module ideas.
Feel free to consult the [Keras source code](https://github.com/keras-team/keras-applications):




1. Based on the CNN modules presented in the lecture e.g. VGG16, Inception, ResNet, Xception, DenseNet, come up with your own CNN module and write a small text discussing your idea and motivations behind the module.

2. Evaluate all your module using the Keras CIFAR10 dataset splits (The model with best test accuracy will present their solution to the class).

In [1]:
from tensorflow.keras.datasets import cifar10

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [2]:
import numpy as np
print(np.shape(x_train))
print(np.shape(y_train))
print(np.shape(x_test))
print(np.shape(y_test))
#print(x_train[0,:,:,:])#,RGB = 3
#print(y_train)


(50000, 32, 32, 3)
(50000, 1)
(10000, 32, 32, 3)
(10000, 1)
[[6]
 [9]
 [9]
 ...
 [9]
 [1]
 [1]]


In [3]:
#Transform data to fit softmax
from tensorflow.keras import utils
y_train_categorical = utils.to_categorical(y_train, 10)
y_test_categorical = utils.to_categorical(y_test, 10)

In [4]:
#Hyperparameters
img_shape = (32,32,3)
classes_number = 10

In [5]:
#MODEL: AlexNet
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization

alexnet = Sequential()

# Layer 1
# 96 filter mit 11x11 convolution too big for 32x32 img?
alexnet.add(Conv2D(96, (11, 11), input_shape=img_shape, padding='same'))
alexnet.add(BatchNormalization())
alexnet.add(Activation('relu'))
alexnet.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 2
alexnet.add(Conv2D(256, (5, 5), padding='same'))
alexnet.add(BatchNormalization())
alexnet.add(Activation('relu'))
alexnet.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 3
alexnet.add(Conv2D(384, (3, 3), padding='same'))
alexnet.add(BatchNormalization())
alexnet.add(Activation('relu'))
alexnet.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 4
alexnet.add(Conv2D(384, (3, 3), padding='same'))
alexnet.add(BatchNormalization())
alexnet.add(Activation('relu'))

# Layer 5
alexnet.add(Conv2D(256, (3, 3), padding='same'))
alexnet.add(BatchNormalization())
alexnet.add(Activation('relu'))
alexnet.add(MaxPooling2D(pool_size=(2, 2)))

alexnet.add(Flatten())

# Layer 6 - fully connected layer
alexnet.add(Dense(4096))
alexnet.add(BatchNormalization())
alexnet.add(Activation('relu'))
alexnet.add(Dropout(0.5))

# Layer 7
alexnet.add(Dense(4096))
alexnet.add(BatchNormalization())
alexnet.add(Activation('relu'))
alexnet.add(Dropout(0.5))

# Layer 8
alexnet.add(Dense(classes_number))
alexnet.add(BatchNormalization())
alexnet.add(Activation('softmax'))

alexnet.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 96)        34944     
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 96)        384       
_________________________________________________________________
activation (Activation)      (None, 32, 32, 96)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 96)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 16, 256)       614656    
_________________________________________________________________
batch_normalization_1 (Batch (None, 16, 16, 256)       1024      
_________________________________________________________________
activation_1 (Activation)    (None, 16, 16, 256)       0         
__________

In [8]:
#Compile 
alexnet.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#Train
alexnet.fit(x_train, y_train_categorical, validation_data=(x_test,y_test_categorical), batch_size=1000, epochs=1, verbose=1)

Train on 50000 samples, validate on 10000 samples
Epoch 1/1
 1000/50000 [..............................] - ETA: 1:01:08 - loss: 2.7845 - acc: 0.0780

KeyboardInterrupt: 

3. Evaluate your module using the FERPlus dataset (The model with the best test accuracy will present their solution to the class).

    3.1 Download the [FER2013 dataset](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data) (images_path).
    
    3.2 Download the [FERPlus labels](https://github.com/Microsoft/FERPlus/blob/master/fer2013new.csv) (labels_path).
    
    3.3 Use the following code snippet to load the dataset giving the appropiate paths to the csv files downloaded in 3.1 and 3.2:

In [None]:
import pandas as pd
import numpy as np
import cv2

In [None]:
class FERPlus(object):
    """Class for loading FER2013 [1] emotion classification dataset with
    the FERPlus labels [2]:
    [1] kaggle.com/c/challenges-in-representation-learning-facial-\
            expression-recognition-challenge
    [2] github.com/Microsoft/FERPlu://github.com/Microsoft/FERPlus"""

    def __init__(self, images_path, labels_path, split='train', image_size=(48, 48),
                 dataset_name='FERPlus'):

        self.split = split
        self.image_size = image_size
        self.dataset_name = dataset_name
        self.images_path = images_path
        self.labels_path = labels_path
        self.class_names = ['neutral', 'happiness', 'surprise', 'sadness',
                            'anger', 'disgust', 'fear', 'contempt']
        self.num_classes = len(self.class_names)
        self.arg_to_name = dict(zip(range(self.num_classes), self.class_names))
        self.name_to_arg = dict(zip(self.class_names, range(self.num_classes)))
        self._split_to_filter = {
            'train': 'Training', 'val': 'PublicTest', 'test': 'PrivateTest'}

    def load_data(self):
        filter_name = self._split_to_filter[self.split]
        pixel_sequences = pd.read_csv(self.images_path)
        pixel_sequences = pixel_sequences[pixel_sequences.Usage == filter_name]
        pixel_sequences = pixel_sequences['pixels'].tolist()
        faces = []
        for pixel_sequence in pixel_sequences:
            face = [float(pixel) for pixel in pixel_sequence.split(' ')]
            face = np.asarray(face).reshape(48, 48)
            faces.append(cv2.resize(face, self.image_size))
        faces = np.asarray(faces)
        faces = np.expand_dims(faces, -1)

        emotions = pd.read_csv(self.labels_path)
        emotions = emotions[emotions.Usage == filter_name]
        emotions = emotions.iloc[:, 2:10].values
        N = np.sum(emotions, axis=1)
        mask = N != 0
        N, faces, emotions = N[mask], faces[mask], emotions[mask]
        emotions = emotions / np.expand_dims(N, 1)
        return faces, emotions