# HW: X-ray images classification
--------------------------------------

### Edited by Bar Goldner & Daria Hasin

Before you begin, open Mobaxterm and connect to triton with the user and password you were give with. Activate the environment `2ndPaper` and then type the command `pip install scikit-image`.

In this assignment you will be dealing with classification of 32X32 X-ray images of the chest. The image can be classified into one of four options: lungs (l), clavicles (c), and heart (h) and background (b). Even though those labels are dependent, we will treat this task as multiclass and not as multilabel. The dataset for this assignment is located on a shared folder on triton (`/MLdata/MLcourse/X_ray/'`).

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import numpy as np
from tensorflow.keras.layers import Dense, MaxPool2D, Conv2D, Dropout
from tensorflow.keras.layers import Flatten, InputLayer
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import *

from tensorflow.keras.initializers import Constant
from tensorflow.keras.datasets import fashion_mnist
import tensorflow.keras.backend as K
from tensorflow.keras import regularizers
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import *
from skimage.io import imread

from skimage.transform import rescale, resize, downscale_local_mean
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
import tensorflow as tf
config = tf.compat.v1.ConfigProto(gpu_options =
                         tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.2)
# device_count = {'GPU': 1}
)
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(session)

In [3]:
def preprocess(datapath):
    # This part reads the images
    classes = ['b','c','l','h']
    imagelist = [fn for fn in os.listdir(datapath)]
    N = len(imagelist)
    num_classes = len(classes)
    images = np.zeros((N, 32, 32, 1))
    Y = np.zeros((N,num_classes))
    ii=0
    for fn in imagelist:

        src = imread(os.path.join(datapath, fn),1)
        img = resize(src,(32,32),order = 3)
        
        images[ii,:,:,0] = img
        cc = -1
        for cl in range(len(classes)):
            if fn[-5] == classes[cl]:
                cc = cl
        Y[ii,cc]=1
        ii += 1

    BaseImages = images
    BaseY = Y
    return BaseImages, BaseY

In [4]:
def preprocess_train_and_val(datapath):
    # This part reads the images
    classes = ['b','c','l','h']
    imagelist = [fn for fn in os.listdir(datapath)]
    N = len(imagelist)
    num_classes = len(classes)
    images = np.zeros((N, 32, 32, 1))
    Y = np.zeros((N,num_classes))
    ii=0
    for fn in imagelist:

        images[ii,:,:,0] = imread(os.path.join(datapath, fn),1)
        cc = -1
        for cl in range(len(classes)):
            if fn[-5] == classes[cl]:
                cc = cl
        Y[ii,cc]=1
        ii += 1

    return images, Y

In [5]:
#Loading the data for training and validation:
src_data = '/MLdata/MLcourse/X_ray/'
train_path = src_data + 'train'
val_path = src_data + 'validation'
test_path = src_data + 'test'
BaseX_train , BaseY_train = preprocess_train_and_val(train_path)
BaseX_val , BaseY_val = preprocess_train_and_val(val_path)
X_test, Y_test = preprocess(test_path)

In [6]:
keras.backend.clear_session()

### PART 1: Fully connected layers 
--------------------------------------

---
<span style="color:red">***Task 1:***</span> *NN with fully connected layers. 

Elaborate a NN with 2 hidden fully connected layers with 300, 150 neurons and 4 neurons for classification. Use ReLU activation functions for the hidden layers and He_normal for initialization. Don't forget to flatten your image before feedforward to the first dense layer. Name the model `model_relu`.*

---

In [7]:
#--------------------------Impelment your code here:-------------------------------------
n_layers = 300
n_classes = 4
input_shape = (32,32,1)

model_relu = Sequential()
model_relu.add(Flatten(input_shape=input_shape))

model_relu.add(Dense(n_layers, activation='relu', kernel_initializer='he_normal'))
model_relu.add(Dense(n_layers/2, activation='relu', kernel_initializer='he_normal'))

model_relu.add(Dense(n_classes))
model_relu.add(Activation('softmax'))

#----------------------------------------------------------------------------------------

Instructions for updating:
Colocations handled automatically by placer.


In [8]:
model_relu.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 300)               307500    
_________________________________________________________________
dense_1 (Dense)              (None, 150)               45150     
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 604       
_________________________________________________________________
activation (Activation)      (None, 4)                 0         
Total params: 353,254
Trainable params: 353,254
Non-trainable params: 0
_________________________________________________________________


In [9]:
#Inputs: 
input_shape = (32,32,1)
learn_rate = 1e-5
decay = 0
batch_size = 64
epochs = 25

#Define your optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)


Compile the model with the optimizer above, accuracy metric and adequate loss for multiclass task. Train your model on the training set and evaluate the model on the testing set. Print the accuracy and loss over the testing set.

In [10]:
#--------------------------Impelment your code here:-------------------------------------
model_relu.compile(optimizer=AdamOpt, metrics=['accuracy'], loss='categorical_crossentropy')

# saving the initial weights
if not("results" in os.listdir()):
    os.mkdir("results")
save_dir = "results/"
model_name = "init_weigths_MLP_relu_task1.h5"
model_path = os.path.join(save_dir, model_name)
model_relu.save(model_path)
print('Saved initialized model at %s ' % model_path)

history = model_relu.fit(BaseX_train, BaseY_train, batch_size=batch_size, epochs=epochs, validation_split=0, validation_data = (BaseX_val, BaseY_val), shuffle=True)
loss_and_metrics = model_relu.evaluate(X_test, Y_test, verbose=2)

# saving the final weights
if not("results" in os.listdir()):
    os.mkdir("results")
save_dir = "results/"
model_name = "final_weights_MLP_relu_25_epochs.h5"
model_path = os.path.join(save_dir, model_name)
model_relu.save(model_path)
print('Saved trained model at %s ' % model_path)

print()
print('The loss is:', np.round(loss_and_metrics[0], 2))
print('The accuracy is:', np.round(loss_and_metrics[1], 2))
#----------------------------------------------------------------------------------------

Saved initialized model at results/init_weigths_MLP_relu_task1.h5 
Train on 6474 samples, validate on 1728 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
 - 0s - loss: 0.7856 - acc: 0.6971
Saved trained model at results/final_weights_MLP_relu_25_epochs.h5 

The loss is: 0.79
The accuracy is: 0.7


---
<span style="color:red">***Task 2:***</span> *Activation functions.* 

**Change the activation functions to LeakyRelu or tanh or sigmoid. Name the new model `new_a_model`. Explain how it can affect the model.**

We changed the activation functions to tanh and it can effect the model in some ways:
* In tanh we can see the vanishing gradient problem - the gradient can be very small and prevent a weight from changing its value, while ReLU avoids and rectifies this problem.
* The proccess can be slower. ReLu is less computationally expensive than tanh because it involves simpler mathematical operations.

---

In [11]:
#--------------------------Impelment your code here:-------------------------------------
n_layers = 300
n_classes = 4
input_shape = (32,32,1)

new_a_model = Sequential()
new_a_model.add(Flatten(input_shape=input_shape))

new_a_model.add(Dense(n_layers, activation='tanh', kernel_initializer='he_normal'))
new_a_model.add(Dense(n_layers/2, activation='tanh', kernel_initializer='he_normal'))

new_a_model.add(Dense(n_classes))
new_a_model.add(Activation('softmax'))
#----------------------------------------------------------------------------------------

In [12]:
new_a_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 1024)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 300)               307500    
_________________________________________________________________
dense_4 (Dense)              (None, 150)               45150     
_________________________________________________________________
dense_5 (Dense)              (None, 4)                 604       
_________________________________________________________________
activation_1 (Activation)    (None, 4)                 0         
Total params: 353,254
Trainable params: 353,254
Non-trainable params: 0
_________________________________________________________________


---
<span style="color:red">***Task 3:***</span> *Number of epochs.* 

**Train the new model using 25 and 40 epochs. What difference does it makes in term of performance? Remember to save the compiled model for having initialized weights for every run as we did in tutorial 12. Evaluate each trained model on the test set**

In term of performance, greater number of epochs will get better results in the training set (high accuracy), but we will risk over-fitting which result in low accuracy on the training set. 

---

In [13]:
#Inputs: 
input_shape = (32,32,1)
learn_rate = 1e-5
decay = 0
batch_size = 64
epochs = 25

#Defining the optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)

In [14]:
#--------------------------Impelment your code here:-------------------------------------
new_a_model.compile(optimizer=AdamOpt, metrics=['accuracy'], loss='categorical_crossentropy')

# saving the initial weights
if not("results" in os.listdir()):
    os.mkdir("results")
save_dir = "results/"
model_name = "init_weigths_MLP_tanh_task3.h5"
model_path = os.path.join(save_dir, model_name)
new_a_model.save(model_path)
print('Saved initialized model at %s ' % model_path)

history = new_a_model.fit(BaseX_train, BaseY_train, batch_size=batch_size, epochs=epochs, validation_split=0, validation_data = (BaseX_val, BaseY_val), shuffle=True)
loss_and_metrics = new_a_model.evaluate(X_test, Y_test, verbose=2)

# saving the final weights
if not("results" in os.listdir()):
    os.mkdir("results")
save_dir = "results/"
model_name = "final_weights_MLP_tanh_25_epochs.h5"
model_path = os.path.join(save_dir, model_name)
new_a_model.save(model_path)
print('Saved trained model at %s ' % model_path)

print()
print('The loss is:', np.round(loss_and_metrics[0], 2))
print('The accuracy is:', np.round(loss_and_metrics[1], 2))
#-----------------------------------------------------------------------------------------

Saved initialized model at results/init_weigths_MLP_tanh_task3.h5 
Train on 6474 samples, validate on 1728 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
 - 0s - loss: 0.8204 - acc: 0.6457
Saved trained model at results/final_weights_MLP_tanh_25_epochs.h5 

The loss is: 0.82
The accuracy is: 0.65


In [15]:
#Inputs: 
input_shape = (32,32,1)
learn_rate = 1e-5
decay = 0
batch_size = 64
epochs = 40
# epochs = 50  # check for task 4: Batch normalization question.

#Defining the optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)

In [16]:
#--------------------------Impelment your code here:-------------------------------------
from tensorflow.keras.models import load_model
new_a_model = load_model("results/init_weigths_MLP_tanh_task3.h5")

history = new_a_model.fit(BaseX_train, BaseY_train, batch_size=batch_size, epochs=epochs, validation_split=0, validation_data = (BaseX_val, BaseY_val), shuffle=True)
loss_and_metrics = new_a_model.evaluate(X_test, Y_test, verbose=2)

# saving the final weights
if not("results" in os.listdir()):
    os.mkdir("results")
save_dir = "results/"
model_name = "final_weights_MLP_tanh_40_epochs.h5"
model_path = os.path.join(save_dir, model_name)
new_a_model.save(model_path)
print('Saved trained model at %s ' % model_path)

print()
print('The loss is:', np.round(loss_and_metrics[0], 2))
print('The accuracy is:', np.round(loss_and_metrics[1], 2))
#-----------------------------------------------------------------------------------------

Train on 6474 samples, validate on 1728 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
 - 0s - loss: 0.8174 - acc: 0.6571
Saved trained model at results/final_weights_MLP_tanh_40_epochs.h5 

The loss is: 0.82
The accuracy is: 0.66


---
<span style="color:red">***Task 4:***</span> *Mini-batches.* 

**Build the `model_relu` again and run it with a batch size of 32 instead of 64. What are the advantages of the mini-batch vs. SGD?**

The advantages of mini-batch over SGD are:
* Higher accuracy
* Better computational efficiency 
* Mini-batch perform weight updates more often than SGD, so we achieve faster learning.

---

In [17]:
keras.backend.clear_session()

In [18]:
#--------------------------Impelment your code here:-------------------------------------
n_layers = 300
n_classes = 4
input_shape = (32,32,1)

model_relu = Sequential()
model_relu.add(Flatten(input_shape=input_shape))

model_relu.add(Dense(n_layers, activation='relu', kernel_initializer='he_normal'))
model_relu.add(Dropout(0.2))

model_relu.add(Dense(n_layers/2, activation='relu', kernel_initializer='he_normal'))
model_relu.add(Dropout(0.2))

model_relu.add(Dense(n_classes))
model_relu.add(Activation('softmax'))
#----------------------------------------------------------------------------------------

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [19]:
batch_size = 32
epochs = 50
learn_rate = 1e-5
decay = 0

#Define your optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)


In [20]:
#--------------------------Impelment your code here:-------------------------------------
model_relu.compile(optimizer=AdamOpt, metrics=['accuracy'], loss='categorical_crossentropy')

# saving the initial weights
if not("results" in os.listdir()):
    os.mkdir("results")
save_dir = "results/"
model_name = "init_weigths_MLP_relu_task4.h5"
model_path = os.path.join(save_dir, model_name)
model_relu.save(model_path)
print('Saved initialized model at %s ' % model_path)

history = model_relu.fit(BaseX_train, BaseY_train, batch_size=batch_size, epochs=epochs, validation_split=0, validation_data = (BaseX_val, BaseY_val), shuffle=True)
loss_and_metrics = model_relu.evaluate(X_test, Y_test, verbose=2)

# saving the final weights
if not("results" in os.listdir()):
    os.mkdir("results")
save_dir = "results/"
model_name = "final_weights_MLP_relu_50_epochs.h5"
model_path = os.path.join(save_dir, model_name)
model_relu.save(model_path)
print('Saved trained model at %s ' % model_path)

print()
print('The loss is:', np.round(loss_and_metrics[0], 2))
print('The accuracy is:', np.round(loss_and_metrics[1], 2))
#----------------------------------------------------------------------------------------

Saved initialized model at results/init_weigths_MLP_relu_task4.h5 
Train on 6474 samples, validate on 1728 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
 - 0s - loss: 0.8480 - acc: 0.6571
Saved trained model at results/final_weights_MLP_relu_50_epochs.h5 

The loss is: 0.85
The accuracy is: 0.66


---
<span style="color:red">***Task 4:***</span> *Batch normalization.* 

**Build the `new_a_model` again and add batch normalization layers. How does it impact your results?**

Batch normalization is a technique that standardizes the inputs to a layer for each mini-batch. This way, the model achieves convergence faster and stabilizing the learning process.


We can see the impact on our results in the accuracy metric (for 50 epochs): 

With batch normalization: 0.6971

Without batch normalization: 0.6629

---

In [21]:
keras.backend.clear_session()

In [22]:
#--------------------------Impelment your code here:-------------------------------------
n_layers = 300
n_classes = 4
input_shape = (32,32,1)

new_a_model = Sequential()
new_a_model.add(Flatten(input_shape=input_shape))

new_a_model.add(Dense(n_layers, activation='tanh', kernel_initializer='he_normal'))
new_a_model.add(BatchNormalization())
new_a_model.add(Dropout(0.2))

new_a_model.add(Dense(n_layers/2, activation='tanh', kernel_initializer='he_normal'))
new_a_model.add(BatchNormalization())
new_a_model.add(Dropout(0.2))

new_a_model.add(Dense(n_classes))
new_a_model.add(Activation('softmax'))
#---------------------------------------------------------------------------------------

In [23]:
batch_size = 32
epochs = 50
learn_rate = 1e-5
decay = 0

#Define your optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)
#Compile the network: 
new_a_model.compile(optimizer=AdamOpt, metrics=['accuracy'], loss='categorical_crossentropy')

In [24]:
#Preforming the training by using fit 
#--------------------------Impelment your code here:-------------------------------------
new_a_model.compile(optimizer=AdamOpt, metrics=['accuracy'], loss='categorical_crossentropy')

# saving the initial weights
if not("results" in os.listdir()):
    os.mkdir("results")
save_dir = "results/"
model_name = "init_weigths_MLP_tanh_task4.h5"
model_path = os.path.join(save_dir, model_name)
new_a_model.save(model_path)
print('Saved initialized model at %s ' % model_path)

history = new_a_model.fit(BaseX_train, BaseY_train, batch_size=batch_size, epochs=epochs, validation_split=0, validation_data = (BaseX_val, BaseY_val), shuffle=True)
loss_and_metrics = new_a_model.evaluate(X_test, Y_test, verbose=2)

# saving the final weights
if not("results" in os.listdir()):
    os.mkdir("results")
save_dir = "results/"
model_name = "final_weights_MLP_tanh_50_epochs_batch_norm.h5"
model_path = os.path.join(save_dir, model_name)
new_a_model.save(model_path)
print('Saved trained model at %s ' % model_path)

print()
print('The loss is:', np.round(loss_and_metrics[0], 2))
print('The accuracy is:', np.round(loss_and_metrics[1], 2))
#----------------------------------------------------------------------------------------

Saved initialized model at results/init_weigths_MLP_tanh_task4.h5 
Train on 6474 samples, validate on 1728 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
 - 0s - loss: 0.8642 - acc: 0.6857
Saved trained model at results/final_weights_MLP_tanh_50_epochs_batch_norm.h5 

The loss is: 0.86
The accuracy is: 0.69


### PART 2: Convolutional Neural Network (CNN)
------------------------------------------------------------------------------------

---
<span style="color:red">***Task 1:***</span> *2D CNN.* 

Have a look at the model below and answer the following:

**How many layers does it have?**
    
The model has 8 layers: 5 layers of 2D convolution and 3 layers of fully connected.
    
    
**How many filter in each layer?**
    
Conv2D_1: 64 filters

Conv2D_2: 128 filters

Conv2D_3: 128 filters

Conv2D_4: 256 filters

Conv2D_5: 256 filters
    

**Would the number of parmaters be similar to a fully connected NN?**
    
No, the number will be different because in a fully connected NN each neuron is connected to every neuron in the previous layer, and each connection has it's own weight (trainable parameter), while in 2D convolution each neuron is only connected to a few neurons in the previous layer, and the same set of weights is used for every neuron. 


**Is this specific NN performing regularization?**

Yes, the NN performing regularization:
* Each 2D convolution layer include regularization kernel (L2).
* If drop is True, we have 6 dropouts in the flow. Dropout is a regularization technique that zeros weights of some neurons in the layer randomly (the rate is defined by dropRate).

---

In [25]:
def get_net(input_shape,drop,dropRate,reg):
    #Defining the network architecture:
    model = Sequential()
    model.add(Permute((1,2,3),input_shape = input_shape))
    model.add(Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_1',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_2',kernel_regularizer=regularizers.l2(reg)))
    if drop:    
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_3',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_4',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_5',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    #Fully connected network tail:      
    model.add(Dense(512, activation='elu',name='FCN_1')) 
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(Dense(128, activation='elu',name='FCN_2'))
    model.add(Dense(4, activation= 'softmax',name='FCN_3'))
    model.summary()
    return model

In [26]:
input_shape = (32,32,1)
learn_rate = 1e-5
decay = 1e-03
batch_size = 64
epochs = 25
drop = True
dropRate = 0.3
reg = 1e-2
NNet = get_net(input_shape,drop,dropRate,reg)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute (Permute)            (None, 32, 32, 1)         0         
_________________________________________________________________
Conv2D_1 (Conv2D)            (None, 32, 32, 64)        640       
_________________________________________________________________
dropout_2 (Dropout)          (None, 32, 32, 64)        0         
_________________________________________________________________
batch_normalization_v1_2 (Ba (None, 32, 32, 64)        128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 64)        0         
_________________________________________________________________
Conv2D_2 (Conv2D)            (None, 16, 16, 128)       73856     
_________________________________________________________________
dropout_3 (Dropout)          (None, 16, 16, 128)       0         
__________

In [27]:
NNet=get_net(input_shape,drop,dropRate,reg)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute_1 (Permute)          (None, 32, 32, 1)         0         
_________________________________________________________________
Conv2D_1 (Conv2D)            (None, 32, 32, 64)        640       
_________________________________________________________________
dropout_8 (Dropout)          (None, 32, 32, 64)        0         
_________________________________________________________________
batch_normalization_v1_7 (Ba (None, 32, 32, 64)        128       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
Conv2D_2 (Conv2D)            (None, 16, 16, 128)       73856     
_________________________________________________________________
dropout_9 (Dropout)          (None, 16, 16, 128)       0         
__________

In [28]:
from tensorflow.keras.optimizers import *
import os
from tensorflow.keras.callbacks import *

#Defining the optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)

#Compile the network: 
NNet.compile(optimizer=AdamOpt, metrics=['acc'], loss='categorical_crossentropy')

#Saving checkpoints during training:
# Checkpath = os.getcwd()
# Checkp = ModelCheckpoint(Checkpath, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True, save_freq=1)

In [29]:
#Preforming the training by using fit 
# IMPORTANT NOTE: This will take a few minutes!
h = NNet.fit(x=BaseX_train, y=BaseY_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0, validation_data = (BaseX_val, BaseY_val), shuffle=True)
#NNet.save(model_fn)

Train on 6474 samples, validate on 1728 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [30]:
# NNet.load_weights('Weights_1.h5')

In [31]:
results = NNet.evaluate(X_test,Y_test)
print('test loss, test acc:', results)

test loss, test acc: [7.7783691024780275, 0.32]


---
<span style="color:red">***Task 2:***</span> *Number of filters* 

Rebuild the function `get_net` to have as an input argument a list of number of filters in each layers, i.e. for the CNN defined above the input should have been `[64, 128, 128, 256, 256]`. Now train the model with the number of filters reduced by half. What were the results.

---

In [None]:
#--------------------------Impelment your code here:-------------------------------------
def get_net(filters_list, input_shape, drop, dropRate, reg):
    #Defining the network architecture:
    model = Sequential()
    model.add(Permute((1,2,3),input_shape = input_shape))
    model.add(Conv2D(filters=filters_list[0], kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_1',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=filters_list[1], kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_2',kernel_regularizer=regularizers.l2(reg)))
    if drop:    
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(Conv2D(filters=filters_list[2], kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_3',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=filters_list[3], kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_4',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(Conv2D(filters=filters_list[4], kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_5',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    #Fully connected network tail:      
    model.add(Dense(512, activation='elu',name='FCN_1')) 
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(Dense(128, activation='elu',name='FCN_2'))
    model.add(Dense(4, activation= 'softmax',name='FCN_3'))
    model.summary()
    return model


input_shape = (32,32,1)
learn_rate = 1e-5
decay = 1e-03
batch_size = 64
epochs = 25
drop = True
dropRate = 0.3
reg = 1e-2
filters = [32, 64, 64, 128, 128]

NNet = get_net(filters, input_shape, drop, dropRate, reg)

AdamOpt = Adam(lr=learn_rate,decay=decay)
NNet.compile(optimizer=AdamOpt, metrics=['acc'], loss='categorical_crossentropy')

history = NNet.fit(x=BaseX_train, y=BaseY_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0, validation_data = (BaseX_val, BaseY_val), shuffle=True)
loss_and_metrics = NNet.evaluate(X_test,Y_test)
print()
print('The loss is:', np.round(loss_and_metrics[0], 2))
print('The accuracy is:', np.round(loss_and_metrics[1], 2))

#----------------------------------------------------------------------------------------

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute_2 (Permute)          (None, 32, 32, 1)         0         
_________________________________________________________________
Conv2D_1 (Conv2D)            (None, 32, 32, 32)        320       
_________________________________________________________________
dropout_14 (Dropout)         (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization_v1_12 (B (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
Conv2D_2 (Conv2D)            (None, 16, 16, 64)        18496     
_________________________________________________________________
dropout_15 (Dropout)         (None, 16, 16, 64)        0         
__________

That's all folks! See you :)