# Celebrity Look a Like CNN DL Project

#### Mohamed Youssef - 211001821


## Index

* [Including Necessary Libraries](#libs)
<br>
    * [Other Utilites](#uti)
<br>
* [Data Preprocessing](#dataset)
<br>
    * [Reading Meta Data](#metadata)
    * [Creating Data Frame](#dataframe)

## Including Necessary Libraries and Initializing Utilites<a id='libs'></a>


In [19]:
# Python built-in libraries
from datetime import datetime, timedelta
import time
import warnings
import os

# Libraries for preprocessing and visualizing the data
import scipy.io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# OpenCV library for image processing
import cv2

# Tensorflow and Keras libraries for deep learning models
import tensorflow as tf

import keras
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import (
    Dense,
    Activation,
    Dropout,
    Flatten,
    Input,
    Convolution2D,
    ZeroPadding2D,
    MaxPooling2D,
    Activation,
)
from keras.layers import Conv2D, AveragePooling2D
from keras.models import Model, Sequential
from keras import metrics
from keras.models import model_from_json


# Sklearn train test split function
from sklearn.model_selection import train_test_split

### Other Utilites<a id='uti'></a>

In [20]:
# Disabling warnings
def warn(*args, **kwargs):
    pass


warnings.warn = warn

In [21]:
# Setting the path to the current directory
path = os.getcwd()

In [22]:
# Configuring the GPU

# Setting the second GPU as the default GPU for training

# os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Setting the configuration of the GPU

gpus = tf.config.experimental.list_physical_devices("gpus")

print("Num GPUs Available: ", gpus)


if gpus:
    try:
        tf.config.experimental.set_visible_devices(gpus[1], "GPU")

        tf.config.experimental.set_memory_growth(gpus[1], True)

    except RuntimeError as e:
        print(e)

Num GPUs Available:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


IndexError: list index out of range

## Data Preprocessing <a id='dataset'></a>

### Reading Meta Data <a id='metadata'></a>

In [None]:
# Loading the mat file containing the data
data_mat = scipy.io.loadmat(path + "/Data/imdb_crop/imdb_crop/imdb.mat")

In [None]:
# Getting the total number of records in the data
records = data_mat["imdb"][0][0][0].shape[1]
print("Total no. of records: ", records)

Total no. of records:  460723


In [None]:
# Getting the column names of the data
features = data_mat["imdb"][0][0].dtype
features = list(features.names)
print("Total no. of features: ", len(features))

Total no. of features:  10


In [None]:
# Rename the columns of the data
print("Current Features Names:\n", features)
features[0] = "Date of Birth"
features[1] = "Photo Taken Date"
features[2] = "Full Img Path"
features[3] = "Gender"
features[4] = "Celebrity Name"
features[5] = "Face Location"
features[6] = "Face Score"
features[7] = "Second Face Score"
features[8] = "Celeb Names"
features[9] = "Celeb ID"
print("New Features Names:\n", features)

Current Features Names:
 ['dob', 'photo_taken', 'full_path', 'gender', 'name', 'face_location', 'face_score', 'second_face_score', 'celeb_names', 'celeb_id']
New Features Names:
 ['Date of Birth', 'Photo Taken Date', 'Full Img Path', 'Gender', 'Celebrity Name', 'Face Location', 'Face Score', 'Second Face Score', 'Celeb Names', 'Celeb ID']


### Creating Data Frame <a id='dataframe'></a>

In [None]:
# Creating a dataframe to store the data
data_df = pd.DataFrame(index=(range(0, records)), columns=features)

In [None]:
# Extracting the data from the mat file and storing it in a dataframe
for i in data_mat:
    if i == "imdb":
        current_array = data_mat[i][0][0]
        for j in range(len(current_array)):
            data_df[features[j]] = pd.DataFrame(current_array[j][0])

In [None]:
# Displaying 5 random rows of the data
data_df.sample(5)

Unnamed: 0,Date of Birth,Photo Taken Date,Full Img Path,Gender,Celebrity Name,Face Location,Face Score,Second Face Score,Celeb Names,Celeb ID
358457,725308,2010,[79/nm0069079_rm2437731328_1985-10-28_2010.jpg],0.0,[Troian Bellisario],"[[549.7480128185821, 189.6321294063876, 703.08...",1.311452,1.300156,,19395
168715,715873,1997,[23/nm0005323_rm2796722944_1959-12-29_1997.jpg],0.0,[Paula Poundstone],"[[113.87, 66.67, 198.24, 151.04000000000002]]",3.728143,0.889193,,15231
111586,711968,2011,[48/nm0001448_rm2745891072_1949-4-20_2011.jpg],0.0,[Jessica Lange],"[[636.7705172859768, 444.71198880416983, 768.2...",3.774291,3.53872,,9062
425516,712200,2003,[00/nm0583600_rm3817969920_1949-12-8_2003.jpg],,[Nancy Meyers],"[[319.0377571614311, 91.63221633183745, 386.58...",3.919926,3.770319,,14233
74945,713732,2005,[23/nm0000623_rm3575355904_1954-2-17_2005.jpg],0.0,[Rene Russo],"[[62.54193194689745, 145.12850787609406, 247.7...",2.311964,,,15968


In [None]:
# Describing the data
data_df.describe()

  sqr = _ensure_numeric((avg - values) ** 2)


Unnamed: 0,Date of Birth,Photo Taken Date,Gender,Face Score,Second Face Score,Celeb ID
count,460723.0,460723.0,452261.0,460723.0,213797.0,460723.0
mean,718987.731774,2005.461555,0.581996,-inf,2.452904,10116.802404
std,13253.963535,9.054475,0.493231,,1.064432,5742.153266
min,47.0,1961.0,0.0,-inf,0.730926,1.0
25%,716370.0,2004.0,0.0,1.757891,1.583692,5294.0
50%,719935.0,2008.0,1.0,2.980097,2.355163,10066.0
75%,723073.0,2011.0,1.0,4.006376,3.228071,14922.0
max,734963.0,2015.0,1.0,7.381689,6.395435,20284.0
