# Import libraries

In [1]:
import pandas as pd
import numpy as np
import cv2
import glob
import matplotlib.pyplot as plt
import os
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sns
from PIL import Image

# Getting all the images path:

In [2]:
def importPath(folder):
    path = f"../OUTPUT/images/{folder}/*.jpg"
    list_paths = glob.glob(path)
    return list_paths

In [3]:
folders = ['0-2', '4-6', '8-12', '15-20', '21-35', '36-45', '46-59', '60-100']

faceFiles = []
    
for files in folders:
    ff = importPath(files)
    faceFiles.append(ff)

## Create a dictionary with label / images path:

In [5]:
pics = dict()
for ffiles in faceFiles:
    for foto in ffiles:
        label = foto.split("/")[-2]
        pic = foto
        if label in pics:
            pics[label].append(pic)
        else:
            pics[label] = [pic]

## Create a dataframe with the information label /images path:

In [7]:
im = []

for k,v in pics.items():
    for a in v:
        im.append([k,a])
faces_df =  pd.DataFrame(im, columns=["label","image_path"])

display(faces_df.head())

Unnamed: 0,label,image_path
0,0-2,../OUTPUT/images/0-2/image_669.jpg
1,0-2,../OUTPUT/images/0-2/image_1570.jpg
2,0-2,../OUTPUT/images/0-2/image_346.jpg
3,0-2,../OUTPUT/images/0-2/image_441.jpg
4,0-2,../OUTPUT/images/0-2/image_1055.jpg


In [8]:
print(faces_df.shape)
faces_df.label.value_counts()

(17462, 2)


21-35     5023
0-2       2491
36-45     2340
4-6       2140
8-12      2124
15-20     1642
60-100     872
46-59      830
Name: label, dtype: int64

# Extract face from image and convert them into arrays:

Import haarcascade:

In [9]:
face_cascade = cv2.CascadeClassifier('../INPUT/haarcascade_frontalface_default.xml')

## Convert images into B&W:

In [10]:
def BlackandWhite(image_path):
    im_bw = cv2.imread(image_path)
    im_bw = cv2.cvtColor(im_bw, cv2.COLOR_BGR2GRAY)
    return im_bw

In [11]:
dictionary={}
count = 0
for path in faces_df['image_path']:
    dictionary[count] = BlackandWhite(path)
    count+=1

In [13]:
def Extractface(array):
    try:
        faces = face_cascade.detectMultiScale(array, 1.25, 6)
        x,y,w,h = faces[0]
        face = array[y:y+h,x:x+w]
        face = cv2.resize(face,(48,48))
        return face
    except:
        return "No face was identified"

In [14]:
diction={}
for k,v in dictionary.items():
    diction.setdefault('clean_image', []).append(Extractface(v))

In [16]:
faces_df['arrays']= pd.DataFrame(diction)

## Delete the images which the face was not recognized:

In [19]:
faces_df.drop(faces_df[faces_df.arrays == "No face was identified"].index, inplace = True)

  result = libops.scalar_compare(x.ravel(), y, op)


In [20]:
print(faces_df.shape)
faces_df.head()

(13809, 3)


Unnamed: 0,label,image_path,arrays
0,0-2,../OUTPUT/images/0-2/image_669.jpg,"[[67, 82, 64, 48, 48, 50, 51, 50, 50, 62, 73, ..."
1,0-2,../OUTPUT/images/0-2/image_1570.jpg,"[[35, 33, 40, 42, 38, 58, 107, 141, 154, 135, ..."
4,0-2,../OUTPUT/images/0-2/image_1055.jpg,"[[21, 15, 17, 18, 17, 20, 17, 14, 16, 19, 23, ..."
5,0-2,../OUTPUT/images/0-2/image_1135.jpg,"[[26, 23, 27, 32, 64, 58, 71, 71, 81, 68, 93, ..."
6,0-2,../OUTPUT/images/0-2/image_231.jpg,"[[63, 65, 63, 57, 65, 64, 69, 66, 67, 69, 73, ..."


In [22]:
Faces_detected = 13809*100//17462
print(f"Faces_detected:{Faces_detected}%")

Faces_detected:79%


# Save the faces_detected images:

In [23]:
image_name = []

for a in list(faces_df.image_path):
    im_name = a.split("/")[-1]
    image_name.append(im_name)

In [24]:
faces_df['image_name'] = image_name

In [31]:
faces_df.head()

Unnamed: 0,label,image_path,image_name,arrays
0,0-2,../OUTPUT/faces_detected/0-2/image_669.jpg,image_669.jpg,"[[67, 82, 64, 48, 48, 50, 51, 50, 50, 62, 73, ..."
1,0-2,../OUTPUT/faces_detected/0-2/image_1570.jpg,image_1570.jpg,"[[35, 33, 40, 42, 38, 58, 107, 141, 154, 135, ..."
4,0-2,../OUTPUT/faces_detected/0-2/image_1055.jpg,image_1055.jpg,"[[21, 15, 17, 18, 17, 20, 17, 14, 16, 19, 23, ..."
5,0-2,../OUTPUT/faces_detected/0-2/image_1135.jpg,image_1135.jpg,"[[26, 23, 27, 32, 64, 58, 71, 71, 81, 68, 93, ..."
6,0-2,../OUTPUT/faces_detected/0-2/image_231.jpg,image_231.jpg,"[[63, 65, 63, 57, 65, 64, 69, 66, 67, 69, 73, ..."


In [27]:
# Saving in a new folder(faces_detected):

for index, row in faces_df.iterrows():
    img = Image.fromarray(row['arrays'])
    img.save("../OUTPUT/faces_detected/{}/{}".format(row['label'],row['image_name']))   

# Put all together into a final dataframe:

In [28]:
#update image_path with the new folder:

faces_df['image_path'] = faces_df['image_path'].str.replace('images', 'faces_detected', regex=True)

In [29]:
#Organize columns:

faces_df = faces_df[['label', 'image_path', 'image_name', 'arrays']]

In [30]:
#Transform the labels into numerical

df_label_faces = pd.get_dummies(faces_df['label'])

In [33]:
# Organize labels:

df_label_faces = df_label_faces[['0-2', '4-6', '8-12', '15-20', '21-35', '36-45', '46-59', '60-100']]

In [34]:
#Create column GT:

Face_list =[] 
  
for index, rows in df_label_faces.iterrows(): 
    my_list =[rows['0-2'], rows['4-6'], rows['8-12'], rows['15-20'], rows['21-35'], rows['36-45'], rows['46-59'], rows['60-100']] 
    Face_list.append(my_list)

In [35]:
df_label_faces['gt'] = Face_list

In [36]:
print(df_label_faces.shape)
df_label_faces.head()

(13809, 9)


Unnamed: 0,0-2,4-6,8-12,15-20,21-35,36-45,46-59,60-100,gt
0,1,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0]"
1,1,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0]"
4,1,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0]"
5,1,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0]"
6,1,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0]"


In [37]:
# Merge both dataframes:

data_final = pd.concat([faces_df, df_label_faces], axis=1)

In [39]:
print(data_final.shape)
data_final.head()

(13809, 13)


Unnamed: 0,label,image_path,image_name,arrays,0-2,4-6,8-12,15-20,21-35,36-45,46-59,60-100,gt
0,0-2,../OUTPUT/faces_detected/0-2/image_669.jpg,image_669.jpg,"[[67, 82, 64, 48, 48, 50, 51, 50, 50, 62, 73, ...",1,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0]"
1,0-2,../OUTPUT/faces_detected/0-2/image_1570.jpg,image_1570.jpg,"[[35, 33, 40, 42, 38, 58, 107, 141, 154, 135, ...",1,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0]"
4,0-2,../OUTPUT/faces_detected/0-2/image_1055.jpg,image_1055.jpg,"[[21, 15, 17, 18, 17, 20, 17, 14, 16, 19, 23, ...",1,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0]"
5,0-2,../OUTPUT/faces_detected/0-2/image_1135.jpg,image_1135.jpg,"[[26, 23, 27, 32, 64, 58, 71, 71, 81, 68, 93, ...",1,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0]"
6,0-2,../OUTPUT/faces_detected/0-2/image_231.jpg,image_231.jpg,"[[63, 65, 63, 57, 65, 64, 69, 66, 67, 69, 73, ...",1,0,0,0,0,0,0,0,"[1, 0, 0, 0, 0, 0, 0, 0]"


# Export data_final as Pickle:

In [40]:
data_final.to_pickle("../OUTPUT/data_final.pkl")

All dones!