# Import libraries

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import sys
import cv2

In [2]:
df = pd.read_csv("data.csv")

In [3]:
print(df.shape)
df.head()

(17570, 13)


Unnamed: 0,user_id,original_image,face_id,age,check,gender,x,y,dx,dy,tilt_ang,fiducial_yaw_angle,fiducial_score
0,30601258@N03,10424815813_e94629b1ec_o.jpg,2,"(25, 32)",1,m,301,105,640,641,0,0,94
1,30601258@N03,10437979845_5985be4b26_o.jpg,3,"(25, 32)",1,m,752,1255,484,485,180,0,47
2,30601258@N03,11816644924_075c3d8d59_o.jpg,2,"(25, 32)",1,m,175,80,769,768,-75,0,34
3,30601258@N03,10424595844_1009c687e4_o.jpg,4,"(38, 43)",1,f,1912,905,1224,1224,155,0,64
4,30601258@N03,9506931745_796300ca4a_o.jpg,5,"(25, 32)",1,f,1069,581,1575,1575,0,30,131


In [4]:
df['face_id'] = df['face_id'].apply(str)

In [5]:
df.dtypes

user_id               object
original_image        object
face_id               object
age                   object
check                  int64
gender                object
x                      int64
y                      int64
dx                     int64
dy                     int64
tilt_ang               int64
fiducial_yaw_angle     int64
fiducial_score         int64
dtype: object

Convert face_id column to string, so we can create the path to the image below:

In [6]:
df['path'] = 'faces/' + df['user_id'] + '/coarse_tilt_aligned_face.' + df['face_id'] + '.' + df['original_image']

In [7]:
df.head()

Unnamed: 0,user_id,original_image,face_id,age,check,gender,x,y,dx,dy,tilt_ang,fiducial_yaw_angle,fiducial_score,path
0,30601258@N03,10424815813_e94629b1ec_o.jpg,2,"(25, 32)",1,m,301,105,640,641,0,0,94,faces/30601258@N03/coarse_tilt_aligned_face.2....
1,30601258@N03,10437979845_5985be4b26_o.jpg,3,"(25, 32)",1,m,752,1255,484,485,180,0,47,faces/30601258@N03/coarse_tilt_aligned_face.3....
2,30601258@N03,11816644924_075c3d8d59_o.jpg,2,"(25, 32)",1,m,175,80,769,768,-75,0,34,faces/30601258@N03/coarse_tilt_aligned_face.2....
3,30601258@N03,10424595844_1009c687e4_o.jpg,4,"(38, 43)",1,f,1912,905,1224,1224,155,0,64,faces/30601258@N03/coarse_tilt_aligned_face.4....
4,30601258@N03,9506931745_796300ca4a_o.jpg,5,"(25, 32)",1,f,1069,581,1575,1575,0,30,131,faces/30601258@N03/coarse_tilt_aligned_face.5....


Dataframe with all images from the folder Face and their information.

# Adding size column in the dataframe:

First, get the size of each image by the following functions:

In [8]:
def getInfo(df):
    user = list(df['user_id'])
    face = list(df['face_id'])
    image = list(df['original_image'])
    info = zip(user, face, image)
    info_set = set(info)
    return info_set

In [9]:
images = getInfo(df)

In [10]:
def getImageList(images):
    lst_im = []
    for u,f,i in images:
        a = (f"faces/{u}/coarse_tilt_aligned_face.{f}.{i}")
        lst_im.append(a)
    return lst_im

In [11]:
lst_total = getImageList(images)

In [12]:
def getSize(lst_total):
    lst_size = []
    for imag in lst_total:
        z = cv2.imread(imag)
        size = z.shape
        lst_size.append((imag, size))
    return lst_size

In [13]:
lst_size = getSize(lst_total)

Now, create a dataframe with the lst_size:

In [14]:
df_size = pd.DataFrame(lst_size,columns=['path','size'])

In [15]:
print(df_size.shape)
df_size.head()

(17462, 2)


Unnamed: 0,path,size
0,faces/31885615@N05/coarse_tilt_aligned_face.16...,"(600, 600, 3)"
1,faces/10001312@N04/coarse_tilt_aligned_face.61...,"(599, 599, 3)"
2,faces/63164355@N03/coarse_tilt_aligned_face.10...,"(395, 395, 3)"
3,faces/20632896@N03/coarse_tilt_aligned_face.55...,"(600, 601, 3)"
4,faces/11008464@N06/coarse_tilt_aligned_face.97...,"(768, 593, 3)"


Let's merge both dataframes. This way we will add a column size in df:

In [16]:
df_final = pd.merge(df, df_size, on='path')

Finally, let's remove the columns we won't need for this project:

In [17]:
df_final = df_final.drop(columns=['check', 'gender', 'x', 'y', 'dx', 'dy', 'tilt_ang', 'fiducial_yaw_angle', 'fiducial_score'])

In [18]:
print(df_final.shape)
df_final.head()

(17570, 6)


Unnamed: 0,user_id,original_image,face_id,age,path,size
0,30601258@N03,10424815813_e94629b1ec_o.jpg,2,"(25, 32)",faces/30601258@N03/coarse_tilt_aligned_face.2....,"(601, 600, 3)"
1,30601258@N03,10437979845_5985be4b26_o.jpg,3,"(25, 32)",faces/30601258@N03/coarse_tilt_aligned_face.3....,"(485, 484, 3)"
2,30601258@N03,11816644924_075c3d8d59_o.jpg,2,"(25, 32)",faces/30601258@N03/coarse_tilt_aligned_face.2....,"(601, 600, 3)"
3,30601258@N03,10424595844_1009c687e4_o.jpg,4,"(38, 43)",faces/30601258@N03/coarse_tilt_aligned_face.4....,"(600, 600, 3)"
4,30601258@N03,9506931745_796300ca4a_o.jpg,5,"(25, 32)",faces/30601258@N03/coarse_tilt_aligned_face.5....,"(600, 600, 3)"


In [61]:
df_final['size'].value_counts()

(600, 600, 3)    4228
(600, 601, 3)     729
(601, 600, 3)     694
(242, 242, 3)     388
(306, 306, 3)     307
                 ... 
(381, 381, 3)       1
(664, 510, 3)       1
(240, 183, 3)       1
(484, 600, 3)       1
(726, 600, 3)       1
Name: size, Length: 1555, dtype: int64

The images have different sizes, we need to put all into one size, but this we will do later.

First, let's save it to csv and separate by the following clases:

In [63]:
df_final.to_csv("df_final.csv", index = False)

Saved as csv.

# Age 0-2:

In [20]:
data0_2 = df[df.age == '(0, 2)']

In [21]:
images = getInfo(data0_2)

In [22]:
lst_images = getImageList(images)

In [23]:
len(lst_images)

2491

Saving the images into the correct folder:

In [24]:
def SaveImages(lst_images, folder):
    try:
        for i, pic in enumerate(lst_images):
            img = Image.open(pic)
            img.save(f"images/{folder}/image_{i}.jpg")
    except:
        pass
    return f"All images were saved in the folder {folder}"    

In [25]:
SaveImages(lst_images, '0_2')

'All images were saved in the folder 0_2'

# Age 4-6:

In [26]:
data4_6 = df[df.age == '(4, 6)']

In [27]:
images = getInfo(data4_6)

In [28]:
lst_images = getImageList(images)

In [29]:
len(lst_images)

2140

In [30]:
SaveImages(lst_images, '4_6')

'All images were saved in the folder 4_6'

# Age 8-12:

In [31]:
data8_12 = df[df.age == '(8, 12)']

In [32]:
images = getInfo(data8_12)

In [33]:
lst_images = getImageList(images)

In [34]:
len(lst_images)

2124

In [35]:
SaveImages(lst_images, '8_12')

'All images were saved in the folder 8_12'

# Age 15-20:

In [36]:
data15_20 = df[df.age == '(15, 20)']

In [37]:
images = getInfo(data15_20)

In [38]:
lst_images = getImageList(images)

In [39]:
len(lst_images)

1642

In [40]:
SaveImages(lst_images, '15_20')

'All images were saved in the folder 15_20'

# Age 25-32:

In [41]:
data25_32 = df[df.age == '(25, 32)']

In [42]:
images = getInfo(data25_32)

In [43]:
lst_images = getImageList(images)

In [44]:
len(lst_images)

5023

In [45]:
SaveImages(lst_images, '25_32')

'All images were saved in the folder 25_32'

# Age 38-43:

In [46]:
data38_43 = df[df.age == '(38, 43)']

In [47]:
images = getInfo(data38_43)

In [48]:
lst_images = getImageList(images)

In [49]:
len(lst_images)

2340

In [50]:
SaveImages(lst_images, '38_43')

'All images were saved in the folder 38_43'

# Age 48-53:

In [51]:
data48_53 = df[df.age == '(48, 53)']

In [52]:
images = getInfo(data48_53)

In [53]:
lst_images = getImageList(images)

In [54]:
len(lst_images)

830

In [55]:
SaveImages(lst_images, '48_53')

'All images were saved in the folder 48_53'

# Age 60-100:

In [56]:
data60_100 = df[df.age == '(60, 100)']

In [57]:
images = getInfo(data60_100)

In [58]:
lst_images = getImageList(images)

In [59]:
len(lst_images)

872

In [60]:
SaveImages(lst_images, '60_100')

'All images were saved in the folder 60_100'

All the images were saved on their respective folders.