## **Configuration**

In [None]:
import numpy as np
import cv2
import os
from google.colab import drive
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import datasets, layers, models, losses
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
import pandas as pd
from tabulate import tabulate

In [None]:
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## **Loading dataset**

In [None]:
#connection to my drive
path ="/content/drive/My Drive/ExtendedYaleB"
#images list
images = []
#labels list
labels = []

#loop over images folders
for i in range(1, 39):
    if i < 10:
        subdirPath = os.path.join(path, f'yaleB0{i}')
    #folder 14 doesnot exist
    elif i == 14:
        subdirPath = os.path.join(path, f'yaleB{i+1}')
        i = i +1
    else:
        subdirPath = os.path.join(path, f'yaleB{i}')
    #loop over each file images
    for filename in os.listdir(subdirPath):
        #some folders doesnot contain images only so this if conditon to loop over images only
        if filename.endswith('.pgm'):
            imgPath = os.path.join(subdirPath, filename)
            img = cv2.imread(imgPath, cv2.IMREAD_GRAYSCALE)
            # Resize the image to 64x64
            img = cv2.resize(img, (64, 64))
            images.append(img)
            # Extract the label from the filename and append it to the labels list
            label = int(filename.split("_")[0][-2:]) - 1
            labels.append(label)


In [None]:
#list conversion into array
listToArray = np.array(images)


## **Splitting**

In [None]:
#split the dataset into training 80%, validation 10%, and testing 10% sets

x_train, x_test, y_train, y_test = train_test_split(listToArray,labels,test_size=0.1)
x_train, x_val,y_train, y_val = train_test_split(x_train,y_train,test_size=0.1)

In [None]:
#changing the range to be between [0,1]
x_train = x_train / 255.0
x_val = x_val / 255.0
x_test = x_test/ 255.0
# converting the values to be numpy array
y_train = np.array(y_train)
y_val = np.array(y_val)
y_test = np.array(y_test)


In [None]:
# reshaping the dataset
x_train = x_train.reshape((-1, 64, 64, 1))
x_val = x_val.reshape((-1, 64, 64, 1))
x_test = x_test.reshape((-1, 64, 64, 1))

## **MLP Buliding**

In [None]:
# Build a Multilayer Perceptron model
mlpModel = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(64, 64)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(38, activation='softmax')
])

In [None]:
mlpModel.summary()

Model: "sequential_43"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_30 (Flatten)        (None, 4096)              0         
                                                                 
 dense_79 (Dense)            (None, 128)               524416    
                                                                 
 dense_80 (Dense)            (None, 64)                8256      
                                                                 
 dense_81 (Dense)            (None, 38)                2470      
                                                                 
Total params: 535,142
Trainable params: 535,142
Non-trainable params: 0
_________________________________________________________________


## **CNN Building**

In [None]:
# Build a Convolutional Neural Network (CNN) model
cnnModel = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(38, activation='softmax')
])

In [None]:
cnnModel.summary()

Model: "sequential_44"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_27 (Conv2D)          (None, 62, 62, 32)        320       
                                                                 
 max_pooling2d_27 (MaxPoolin  (None, 31, 31, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_28 (Conv2D)          (None, 29, 29, 64)        18496     
                                                                 
 max_pooling2d_28 (MaxPoolin  (None, 14, 14, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_29 (Conv2D)          (None, 12, 12, 128)       73856     
                                                                 
 max_pooling2d_29 (MaxPoolin  (None, 6, 6, 128)      

## **Set Loss Function & Optimizer**

In [None]:
# Sparse Categorical loss function is used because w have a wide range of labels
mlpModel.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Sparse Categorical loss function is used because w have a wide range of labels
cnnModel.compile(optimizer="adam", loss=losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])

## **Train model**

In [None]:
# Model training with the defult batch size (32) and 30 ephocs
history = mlpModel.fit(x_train, y_train, epochs=30 , validation_data=(x_val, y_val))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# Model training with the defult batch size (32) and 30 ephocs
history = cnnModel.fit(x_train, y_train, epochs=30, validation_data=(x_test, y_test))


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## **Evaluate model**

In [None]:
mlpLoss, mlpAcc = mlpModel.evaluate(x_test, y_test)




In [None]:
cnnLoss, cnnAcc = cnnModel.evaluate(x_test, y_test)



# **Bonus**

## **Data augmentation**

In [None]:
dataAugmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.03),
    #layers.RandomTranslation(height_factor = (-0.2,1), width_factor = (-0.2,1), fill_mode ="reflect"),
    layers.RandomContrast(factor=(0.1,0.2),seed=None),
])

## **MLP With Data augmentation**

In [None]:
# Build a Multilayer Perceptron Model With Data Augmentation
mlpModelAug = tf.keras.models.Sequential([
    dataAugmentation,
    tf.keras.layers.Flatten(input_shape=(64, 64)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(38, activation='softmax')
])

In [None]:
#Training/Testing the model
mlpModelAug.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the MLP model on the training set (with data augmentation), default batch size and 30 epochs
mlpHistoryAug = mlpModelAug.fit(x_train, y_train, epochs=30 , validation_data=(x_val, y_val))

# Evaluate the MLP model on the testing set (with data augmentation)
mlpLossAug, mlpAccAug = mlpModelAug.evaluate(x_test, y_test)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## **CNN with data augmentation**

In [None]:
cnnModelAug = Sequential([
    dataAugmentation,
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(38, activation='softmax')
])


In [None]:
#Training/Testing the model
cnnModelAug.compile(optimizer="adam", loss=losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
# Train the CNN model on the training set (with data augmentation)
cnnHistoryAug = cnnModelAug.fit(x_train, y_train,epochs=30, validation_data=(x_val, y_val))

# Evaluate the CNN model on the testing set (with data augmentation)
cnnLossAug, cnnAccAug = cnnModelAug.evaluate(x_test, y_test)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## **Displaying the data**

In [None]:
# Define the table data as a list of lists
table_data = [
    ['MLP without augmentation', mlpLoss, mlpAcc , np.nan, np.nan],
    ['MLP with augmentation', np.nan, np.nan, mlpLossAug, mlpAccAug],
    ['CNN without augmentation', cnnLoss, cnnAcc , np.nan, np.nan],
    ['CNN with augmentation', np.nan, np.nan, cnnLossAug, cnnAccAug]
]

# Define the table headers
headers = ['Model', 'Loss (without augmentation)', 'Accuracy (without augmentation)', 'Loss (with augmentation)', 'Accuracy (with augmentation)']

# Print the table using the tabulate package
print(tabulate(table_data, headers=headers))



Model                       Loss (without augmentation)    Accuracy (without augmentation)    Loss (with augmentation)    Accuracy (with augmentation)
------------------------  -----------------------------  ---------------------------------  --------------------------  ------------------------------
MLP without augmentation                       0.994154                           0.735772                  nan                             nan
MLP with augmentation                        nan                                nan                           0.497098                        0.878049
CNN without augmentation                       0.176026                           0.97561                   nan                             nan
CNN with augmentation                        nan                                nan                           0.130791                        0.97561
