# HW: X-ray images classification
--------------------------------------

Before you begin, open Mobaxterm and connect to triton with the user and password you were give with. Activate the environment `2ndPaper` and then type the command `pip install scikit-image`.

In this assignment you will be dealing with classification of 32X32 X-ray images of the chest. The image can be classified into one of four options: lungs (l), clavicles (c), and heart (h) and background (b). Even though those labels are dependent, we will treat this task as multiclass and not as multilabel. The dataset for this assignment is located on a shared folder on triton (`/MLdata/MLcourse/X_ray/'`).

In [84]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"]="2"
import numpy as np
from tensorflow.keras.layers import Dense, MaxPool2D, Conv2D, Dropout
from tensorflow.keras.layers import Flatten, InputLayer
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import *

from tensorflow.keras.initializers import Constant
from tensorflow.keras.datasets import fashion_mnist
import tensorflow.keras.backend as K
from tensorflow.keras import regularizers
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import *
from skimage.io import imread

from skimage.transform import rescale, resize, downscale_local_mean
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)


In [85]:
import tensorflow as tf
config = tf.compat.v1.ConfigProto(gpu_options =
                         tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.2)
# device_count = {'GPU': 1}
)
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(session)

In [86]:
def preprocess(datapath):
    # This part reads the images
    classes = ['b','c','l','h']
    imagelist = [fn for fn in os.listdir(datapath)]
    N = len(imagelist)
    num_classes = len(classes)
    images = np.zeros((N, 32, 32, 1))
    Y = np.zeros((N,num_classes))
    ii=0
    for fn in imagelist:

        src = imread(os.path.join(datapath, fn),1)
        img = resize(src,(32,32),order = 3)
        
        images[ii,:,:,0] = img
        cc = -1
        for cl in range(len(classes)):
            if fn[-5] == classes[cl]:
                cc = cl
        Y[ii,cc]=1
        ii += 1

    BaseImages = images
    BaseY = Y
    return BaseImages, BaseY

In [87]:
def preprocess_train_and_val(datapath):
    # This part reads the images
    classes = ['b','c','l','h']
    imagelist = [fn for fn in os.listdir(datapath)]
    N = len(imagelist)
    num_classes = len(classes)
    images = np.zeros((N, 32, 32, 1))
    Y = np.zeros((N,num_classes))
    ii=0
    for fn in imagelist:

        images[ii,:,:,0] = imread(os.path.join(datapath, fn),1)
        cc = -1
        for cl in range(len(classes)):
            if fn[-5] == classes[cl]:
                cc = cl
        Y[ii,cc]=1
        ii += 1

    return images, Y

In [88]:
#Loading the data for training and validation:
src_data = '/MLdata/MLcourse/X_ray/'
train_path = src_data + 'train'
val_path = src_data + 'validation'
test_path = src_data + 'test'
BaseX_train , BaseY_train = preprocess_train_and_val(train_path)
BaseX_val , BaseY_val = preprocess_train_and_val(val_path)
X_test, Y_test = preprocess(test_path)

In [89]:
keras.backend.clear_session()

### PART 1: Fully connected layers 
--------------------------------------

---
<span style="color:red">***Task 1:***</span> *NN with fully connected layers. 

Elaborate a NN with 2 hidden fully connected layers with 300, 150 neurons and 4 neurons for classification. Use ReLU activation functions for the hidden layers and He_normal for initialization. Don't forget to flatten your image before feedforward to the first dense layer. Name the model `model_relu`.*

---

In [90]:
#--------------------------Impelment your code here:-------------------------------------
model_relu = Sequential(name="model_relu")
model_relu.add(Flatten(input_shape=(32, 32, 1)))
model_relu.add(Dense(300, activation='relu', kernel_initializer='he_normal'))
model_relu.add(Dense(150, activation='relu', kernel_initializer='he_normal'))
model_relu.add(Dense(4, activation='softmax'))
#----------------------------------------------------------------------------------------

In [91]:
model_relu.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 300)               307500    
_________________________________________________________________
dense_1 (Dense)              (None, 150)               45150     
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 604       
Total params: 353,254
Trainable params: 353,254
Non-trainable params: 0
_________________________________________________________________


In [92]:
#Inputs: 
input_shape = (32,32,1)
learn_rate = 1e-5
decay = 0
batch_size = 64
epochs = 25

#Define your optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)


Compile the model with the optimizer above, accuracy metric and adequate loss for multiclass task. Train your model on the training set and evaluate the model on the testing set. Print the accuracy and loss over the testing set.

In [93]:
#--------------------------Impelment your code here:-------------------------------------
model_relu.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=AdamOpt)
history = model_relu.fit(BaseX_train , BaseY_train,
          batch_size=batch_size, epochs=epochs,
          verbose=2,
          validation_data=(BaseX_val , BaseY_val))
loss_and_metrics = model_relu.evaluate(X_test, Y_test)
print("Test Loss is {:.2f} ".format(loss_and_metrics[0]))
print("Test Accuracy is {:.2f} %".format(100*loss_and_metrics[1]))
#----------------------------------------------------------------------------------------

Train on 6474 samples, validate on 1728 samples
Epoch 1/25
 - 1s - loss: 1.3709 - acc: 0.3826 - val_loss: 1.2140 - val_acc: 0.5185
Epoch 2/25
 - 1s - loss: 1.1434 - acc: 0.5465 - val_loss: 1.0841 - val_acc: 0.5637
Epoch 3/25
 - 1s - loss: 1.0429 - acc: 0.6023 - val_loss: 1.0066 - val_acc: 0.6406
Epoch 4/25
 - 1s - loss: 0.9768 - acc: 0.6750 - val_loss: 0.9506 - val_acc: 0.6956
Epoch 5/25
 - 1s - loss: 0.9245 - acc: 0.7098 - val_loss: 0.9061 - val_acc: 0.7251
Epoch 6/25
 - 1s - loss: 0.8831 - acc: 0.7255 - val_loss: 0.8702 - val_acc: 0.7442
Epoch 7/25
 - 1s - loss: 0.8484 - acc: 0.7386 - val_loss: 0.8390 - val_acc: 0.7459
Epoch 8/25
 - 1s - loss: 0.8183 - acc: 0.7471 - val_loss: 0.8136 - val_acc: 0.7517
Epoch 9/25
 - 1s - loss: 0.7913 - acc: 0.7597 - val_loss: 0.7887 - val_acc: 0.7552
Epoch 10/25
 - 1s - loss: 0.7660 - acc: 0.7654 - val_loss: 0.7633 - val_acc: 0.7656
Epoch 11/25
 - 1s - loss: 0.7425 - acc: 0.7742 - val_loss: 0.7431 - val_acc: 0.7743
Epoch 12/25
 - 1s - loss: 0.7215 - ac

<span style="color:red">***Task 2:***</span> *Activation functions.* 

Change the activation functions to LeakyRelu or tanh or sigmoid. Name the new model `new_a_model`. Explain how it can affect the model.*

---
**The activation function that we choose affects the output of each neuron and thus the output of the network.
When we are using ReLU it can cause  the  gradient to vanish since it can get the value 0 , so adding a small slope of 0.01 instead of zero (Leaky ReLU) will fix the problem**

In [94]:
#--------------------------Impelment your code here:-------------------------------------
new_a_model = Sequential(name="new_a_model")
new_a_model.add(Flatten(input_shape=(32, 32, 1)))
new_a_model.add(Dense(300, kernel_initializer='he_normal'))
new_a_model.add(LeakyReLU())
new_a_model.add(Dense(150, kernel_initializer='he_normal'))
new_a_model.add(LeakyReLU())
new_a_model.add(Dense(4, activation='softmax'))
#----------------------------------------------------------------------------------------


In [95]:
new_a_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 1024)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 300)               307500    
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 300)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 150)               45150     
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 150)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 4)                 604       
Total params: 353,254
Trainable params: 353,254
Non-trainable params: 0
_________________________________________________________________


---
<span style="color:red">***Task 3:***</span> *Number of epochs.* 

Train the new model using 25 and 40 epochs. What difference does it makes in term of performance? Remember to save the compiled model for having initialized weights for every run as we did in tutorial 12. Evaluate each trained model on the test set*

---
**The number of epochs defines the number of time  my algorithm will pass through my entire training data,Such that in  each epoch the weights are updated.
The bigger the number of epochs the more the algorithm will run over the training set to reduce the error so the accuracy will increased on the training set and if the number of epochs is too high it will lead to overfitting .
When it’s lower it will lead to underfitting since the model didn’t learn enough.**


In [96]:
#Inputs: 
input_shape = (32,32,1)
learn_rate = 1e-5
decay = 0
batch_size = 64
epochs = 25

#Defining the optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)



In [97]:
#--------------------------Impelment your code here:-------------------------------------
new_a_model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=AdamOpt)
if not("results" in os.listdir()):
    os.mkdir("results")
save_dir = "results/"
model_name = "new_a_model_25"
model_path = os.path.join(save_dir, model_name)
new_a_model.save(model_path)
print('Saved initialized model at %s ' % model_path)

history = new_a_model.fit(BaseX_train , BaseY_train,
          batch_size=batch_size, epochs=epochs,
          verbose=2,
          validation_data=(BaseX_val , BaseY_val))
loss_and_metrics = new_a_model.evaluate(X_test, Y_test)
print("Test Loss is {:.2f} ".format(loss_and_metrics[0]))
print("Test Accuracy is {:.2f} %".format(100*loss_and_metrics[1]))
#----------------------------------------------------------------------------------------

Saved initialized model at results/new_a_model_25 
Train on 6474 samples, validate on 1728 samples
Epoch 1/25
 - 1s - loss: 1.2921 - acc: 0.4336 - val_loss: 1.1833 - val_acc: 0.5399
Epoch 2/25
 - 1s - loss: 1.1280 - acc: 0.5913 - val_loss: 1.0651 - val_acc: 0.6273
Epoch 3/25
 - 1s - loss: 1.0284 - acc: 0.6593 - val_loss: 0.9883 - val_acc: 0.6759
Epoch 4/25
 - 1s - loss: 0.9599 - acc: 0.6895 - val_loss: 0.9337 - val_acc: 0.7037
Epoch 5/25
 - 1s - loss: 0.9109 - acc: 0.7096 - val_loss: 0.8935 - val_acc: 0.7101
Epoch 6/25
 - 1s - loss: 0.8735 - acc: 0.7238 - val_loss: 0.8615 - val_acc: 0.7199
Epoch 7/25
 - 1s - loss: 0.8421 - acc: 0.7368 - val_loss: 0.8414 - val_acc: 0.7211
Epoch 8/25
 - 1s - loss: 0.8179 - acc: 0.7430 - val_loss: 0.8147 - val_acc: 0.7321
Epoch 9/25
 - 1s - loss: 0.7951 - acc: 0.7476 - val_loss: 0.7983 - val_acc: 0.7321
Epoch 10/25
 - 1s - loss: 0.7759 - acc: 0.7521 - val_loss: 0.7797 - val_acc: 0.7459
Epoch 11/25
 - 1s - loss: 0.7563 - acc: 0.7583 - val_loss: 0.7630 - va

In [98]:
#Inputs: 
input_shape = (32,32,1)
learn_rate = 1e-5
decay = 0
batch_size = 64
epochs = 40

#Defining the optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)



In [99]:
#--------------------------Impelment your code here:-------------------------------------
new_a_model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=AdamOpt)
if not("results" in os.listdir()):
    os.mkdir("results")
save_dir = "results/"
model_name = "new_a_model_40"
model_path = os.path.join(save_dir, model_name)
new_a_model.save(model_path)
print('Saved initialized model at %s ' % model_path)

history = new_a_model.fit(BaseX_train , BaseY_train,
          batch_size=batch_size, epochs=epochs,
          verbose=2,
          validation_data=(BaseX_val , BaseY_val))
loss_and_metrics = new_a_model.evaluate(X_test, Y_test)
print("Test Loss is {:.2f} ".format(loss_and_metrics[0]))
print("Test Accuracy is {:.2f} %".format(100*loss_and_metrics[1]))
#----------------------------------------------------------------------------------------

Saved initialized model at results/new_a_model_40 
Train on 6474 samples, validate on 1728 samples
Epoch 1/40
 - 1s - loss: 0.5888 - acc: 0.8108 - val_loss: 0.6074 - val_acc: 0.7975
Epoch 2/40
 - 1s - loss: 0.5817 - acc: 0.8145 - val_loss: 0.6038 - val_acc: 0.7969
Epoch 3/40
 - 1s - loss: 0.5749 - acc: 0.8167 - val_loss: 0.5972 - val_acc: 0.7992
Epoch 4/40
 - 1s - loss: 0.5672 - acc: 0.8193 - val_loss: 0.5895 - val_acc: 0.8027
Epoch 5/40
 - 1s - loss: 0.5615 - acc: 0.8200 - val_loss: 0.5868 - val_acc: 0.8009
Epoch 6/40
 - 1s - loss: 0.5554 - acc: 0.8214 - val_loss: 0.5790 - val_acc: 0.8079
Epoch 7/40
 - 1s - loss: 0.5483 - acc: 0.8244 - val_loss: 0.5729 - val_acc: 0.8044
Epoch 8/40
 - 1s - loss: 0.5427 - acc: 0.8247 - val_loss: 0.5679 - val_acc: 0.8067
Epoch 9/40
 - 1s - loss: 0.5368 - acc: 0.8290 - val_loss: 0.5621 - val_acc: 0.8073
Epoch 10/40
 - 1s - loss: 0.5299 - acc: 0.8318 - val_loss: 0.5573 - val_acc: 0.8113
Epoch 11/40
 - 1s - loss: 0.5249 - acc: 0.8309 - val_loss: 0.5519 - va

---
<span style="color:red">***Task 4:***</span> *Mini-batches.* 

Build the `model_relu` again and run it with a batch size of 32 instead of 64. What are the advantages of the mini-batch vs. SGD?*

---
**As we learned in lecture 
Stochastic gradient descent (SGD): compute the gradient of the cost function for each training example.
 Mini-batch gradient descent: compute the gradient of the cost function for 𝑝 training examples and it’s a combination between SGD and BGD.
Mini batch has a better computational efficiency because instead of one observation we go over p observations .**


In [100]:
keras.backend.clear_session()

In [101]:
#--------------------------Impelment your code here:-------------------------------------
model_relu = Sequential(name="model_relu")
model_relu.add(Flatten(input_shape=(32, 32, 1)))
model_relu.add(Dense(300, activation='relu', kernel_initializer='he_normal'))
model_relu.add(Dense(150, activation='relu', kernel_initializer='he_normal'))
model_relu.add(Dense(4, activation='softmax'))
#----------------------------------------------------------------------------------------

In [102]:
batch_size = 32
epochs = 50

#Define your optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)


In [103]:
#--------------------------Impelment your code here:-------------------------------------
model_relu.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=AdamOpt)
history = model_relu.fit(BaseX_train , BaseY_train,
          batch_size=batch_size, epochs=epochs,
          verbose=2,
          validation_data=(BaseX_val , BaseY_val))
loss_and_metrics = model_relu.evaluate(X_test, Y_test)
print("Test Loss is {:.2f} ".format(loss_and_metrics[0]))
print("Test Accuracy is {:.2f} %".format(100*loss_and_metrics[1]))
#----------------------------------------------------------------------------------------

Train on 6474 samples, validate on 1728 samples
Epoch 1/50
 - 2s - loss: 1.2286 - acc: 0.4907 - val_loss: 1.1089 - val_acc: 0.6319
Epoch 2/50
 - 1s - loss: 1.0417 - acc: 0.6634 - val_loss: 0.9934 - val_acc: 0.7112
Epoch 3/50
 - 1s - loss: 0.9529 - acc: 0.7085 - val_loss: 0.9253 - val_acc: 0.7245
Epoch 4/50
 - 1s - loss: 0.8942 - acc: 0.7281 - val_loss: 0.8767 - val_acc: 0.7361
Epoch 5/50
 - 2s - loss: 0.8475 - acc: 0.7437 - val_loss: 0.8354 - val_acc: 0.7459
Epoch 6/50
 - 1s - loss: 0.8086 - acc: 0.7559 - val_loss: 0.8002 - val_acc: 0.7552
Epoch 7/50
 - 1s - loss: 0.7752 - acc: 0.7657 - val_loss: 0.7706 - val_acc: 0.7622
Epoch 8/50
 - 2s - loss: 0.7459 - acc: 0.7732 - val_loss: 0.7446 - val_acc: 0.7737
Epoch 9/50
 - 2s - loss: 0.7175 - acc: 0.7827 - val_loss: 0.7198 - val_acc: 0.7789
Epoch 10/50
 - 2s - loss: 0.6931 - acc: 0.7901 - val_loss: 0.6966 - val_acc: 0.7801
Epoch 11/50
 - 1s - loss: 0.6690 - acc: 0.7946 - val_loss: 0.6751 - val_acc: 0.7865
Epoch 12/50
 - 1s - loss: 0.6465 - ac

---
<span style="color:red">***Task 4:***</span> *Batch normalization.* 

Build the `new_a_model` again and add batch normalization layers. How does it impact your results?*

---
**Batch normalization goal is to do standarizaion  for the input of each layer,we didn't see a change in the accuracy**


In [104]:
keras.backend.clear_session()

In [105]:
#--------------------------Impelment your code here:-------------------------------------
new_a_model = Sequential(name="new_a_model")
new_a_model.add(Flatten(input_shape=(32, 32, 1)))

new_a_model.add(Dense(300, kernel_initializer='he_normal'))
new_a_model.add(BatchNormalization())
new_a_model.add(LeakyReLU())

new_a_model.add(Dense(150, kernel_initializer='he_normal'))
new_a_model.add(BatchNormalization())
new_a_model.add(LeakyReLU())
new_a_model.add(Dense(4, activation='softmax'))
#----------------------------------------------------------------------------------------


In [106]:
batch_size = 32
epochs = 50

#Define your optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)
#Compile the network: 


In [107]:
#Preforming the training by using fit 
#--------------------------Impelment your code here:-------------------------------------
new_a_model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=AdamOpt)

history = new_a_model.fit(BaseX_train , BaseY_train,
          batch_size=batch_size, epochs=epochs,
          verbose=2,
          validation_data=(BaseX_val , BaseY_val))
loss_and_metrics = new_a_model.evaluate(X_test, Y_test)
print("Test Loss is {:.2f} ".format(loss_and_metrics[0]))
print("Test Accuracy is {:.2f} %".format(100*loss_and_metrics[1]))
#----------------------------------------------------------------------------------------

Train on 6474 samples, validate on 1728 samples
Epoch 1/50
 - 3s - loss: 1.0721 - acc: 0.5658 - val_loss: 0.9562 - val_acc: 0.6574
Epoch 2/50
 - 2s - loss: 0.7316 - acc: 0.7471 - val_loss: 0.6936 - val_acc: 0.7726
Epoch 3/50
 - 2s - loss: 0.6160 - acc: 0.7907 - val_loss: 0.5971 - val_acc: 0.8056
Epoch 4/50
 - 2s - loss: 0.5517 - acc: 0.8134 - val_loss: 0.5290 - val_acc: 0.8322
Epoch 5/50
 - 2s - loss: 0.5044 - acc: 0.8369 - val_loss: 0.4848 - val_acc: 0.8490
Epoch 6/50
 - 2s - loss: 0.4733 - acc: 0.8466 - val_loss: 0.4654 - val_acc: 0.8628
Epoch 7/50
 - 2s - loss: 0.4491 - acc: 0.8573 - val_loss: 0.4394 - val_acc: 0.8675
Epoch 8/50
 - 2s - loss: 0.4254 - acc: 0.8625 - val_loss: 0.4356 - val_acc: 0.8692
Epoch 9/50
 - 2s - loss: 0.4051 - acc: 0.8741 - val_loss: 0.4067 - val_acc: 0.8791
Epoch 10/50
 - 2s - loss: 0.3967 - acc: 0.8713 - val_loss: 0.4026 - val_acc: 0.8791
Epoch 11/50
 - 2s - loss: 0.3754 - acc: 0.8818 - val_loss: 0.3894 - val_acc: 0.8819
Epoch 12/50
 - 2s - loss: 0.3693 - ac

### PART 2: Convolutional Neural Network (CNN)
------------------------------------------------------------------------------------

---
<span style="color:red">***Task 1:***</span> *2D CNN.* 

Have a look at the model below and answer the following:
* How many layers does it have?    
**8 layers (5 convolutional layers and 3 fully connected layers)**
* How many filter in each layer?
**First convolutional layer 64
Second convolutional layer  128
Third convolutional layer128
Fourth convolutional layer256
Fifth convolutional layer, 256**
* Would the number of parmaters be similar to a fully connected NN?
**No the number of parameters in this network will be less versus the fully connected because the convolutional network structure allows to reduce the number of parameters**
* Is this specific NN performing regularization?
**Yes, there are L2 regularization,batch normalization  and dropout which are used to prevent overfitting**

---



In [108]:
def get_net(input_shape,drop,dropRate,reg):
    #Defining the network architecture:
    model = Sequential()
    model.add(Permute((1,2,3),input_shape = input_shape))
    model.add(Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_1',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_2',kernel_regularizer=regularizers.l2(reg)))
    if drop:    
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_3',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_4',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_5',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    #Fully connected network tail:      
    model.add(Dense(512, activation='elu',name='FCN_1')) 
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(Dense(128, activation='elu',name='FCN_2'))
    model.add(Dense(4, activation= 'softmax',name='FCN_3'))
    model.summary()
    return model

In [109]:
input_shape = (32,32,1)
learn_rate = 1e-5
decay = 1e-03
batch_size = 64
epochs = 25
drop = True
dropRate = 0.3
reg = 1e-2
NNet = get_net(input_shape,drop,dropRate,reg)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute (Permute)            (None, 32, 32, 1)         0         
_________________________________________________________________
Conv2D_1 (Conv2D)            (None, 32, 32, 64)        640       
_________________________________________________________________
dropout (Dropout)            (None, 32, 32, 64)        0         
_________________________________________________________________
batch_normalization_v1_2 (Ba (None, 32, 32, 64)        128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 64)        0         
_________________________________________________________________
Conv2D_2 (Conv2D)            (None, 16, 16, 128)       73856     
_________________________________________________________________
dropout_1 (Dropout)          (None, 16, 16, 128)       0         
__________

In [110]:
NNet=get_net(input_shape,drop,dropRate,reg)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute_1 (Permute)          (None, 32, 32, 1)         0         
_________________________________________________________________
Conv2D_1 (Conv2D)            (None, 32, 32, 64)        640       
_________________________________________________________________
dropout_6 (Dropout)          (None, 32, 32, 64)        0         
_________________________________________________________________
batch_normalization_v1_7 (Ba (None, 32, 32, 64)        128       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
Conv2D_2 (Conv2D)            (None, 16, 16, 128)       73856     
_________________________________________________________________
dropout_7 (Dropout)          (None, 16, 16, 128)       0         
__________

In [111]:
from tensorflow.keras.optimizers import *
import os
from tensorflow.keras.callbacks import *

#Defining the optimizar parameters:
AdamOpt = Adam(lr=learn_rate,decay=decay)

#Compile the network: 
NNet.compile(optimizer=AdamOpt, metrics=['acc'], loss='categorical_crossentropy')

#Saving checkpoints during training:
Checkpath = os.getcwd()
Checkp = ModelCheckpoint(Checkpath, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True)

In [112]:
#Preforming the training by using fit 
# IMPORTANT NOTE: This will take a few minutes!
h = NNet.fit(x=BaseX_train, y=BaseY_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0, validation_data = (BaseX_val, BaseY_val), shuffle=True)
#NNet.save(model_fn)

Train on 6474 samples, validate on 1728 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [113]:
# NNet.load_weights('Weights_1.h5')

In [114]:
results = NNet.evaluate(X_test,Y_test)
print('test loss, test acc:', results)

test loss, test acc: [8.089081878662109, 0.38285714]


---
<span style="color:red">***Task 2:***</span> *Number of filters* 

Rebuild the function `get_net` to have as an input argument a list of number of filters in each layers, i.e. for the CNN defined above the input should have been `[64, 128, 128, 256, 256]`. Now train the model with the number of filters reduced by half. What were the results.

---

In [115]:
def get_net(input_shape,drop,dropRate,reg,filters_num):
    #Defining the network architecture:
    model = Sequential()
    model.add(Permute((1,2,3),input_shape = input_shape))
    model.add(Conv2D(filters=filters_num[0], kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_1',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=filters_num[1], kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_2',kernel_regularizer=regularizers.l2(reg)))
    if drop:    
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(Conv2D(filters=filters_num[2], kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_3',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=filters_num[3], kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_4',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(Conv2D(filters=filters_num[4], kernel_size=(3,3), padding='same', activation='relu',name='Conv2D_5',kernel_regularizer=regularizers.l2(reg)))
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(BatchNormalization(axis=1))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    #Fully connected network tail:      
    model.add(Dense(512, activation='elu',name='FCN_1')) 
    if drop:
        model.add(Dropout(rate=dropRate))
    model.add(Dense(128, activation='elu',name='FCN_2'))
    model.add(Dense(4, activation= 'softmax',name='FCN_3'))
    model.summary()
    return model

filters_num=list(np.array([64, 128, 128, 256, 256])//2)
NNet=get_net(input_shape,drop,dropRate,reg,filters_num)
NNet.compile(optimizer=AdamOpt, metrics=['acc'], loss='categorical_crossentropy')

#Saving checkpoints during training:
Checkpath = os.getcwd()
Checkp = ModelCheckpoint(Checkpath, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True)

h = NNet.fit(x=BaseX_train, y=BaseY_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0, validation_data = (BaseX_val, BaseY_val), shuffle=True)

results = NNet.evaluate(X_test,Y_test)
print('test loss, test acc:', results)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute_2 (Permute)          (None, 32, 32, 1)         0         
_________________________________________________________________
Conv2D_1 (Conv2D)            (None, 32, 32, 32)        320       
_________________________________________________________________
dropout_12 (Dropout)         (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization_v1_12 (B (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
Conv2D_2 (Conv2D)            (None, 16, 16, 64)        18496     
_________________________________________________________________
dropout_13 (Dropout)         (None, 16, 16, 64)        0         
__________

That's all folks! See you :)