In [1]:
import cv2
import mediapipe as mp
import numpy as np
# Import required libraries to run the naive baseline
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
from collections import OrderedDict
import matplotlib.pyplot as plt
import pickle

import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchvision.models import mobilenet_v3_small

In [5]:
# Test with the mediapipe face mesh pretrained model
# download the mediapipe library   
# pip install mediapipe

# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

In [2]:
# open df_train.pkl

image_dir = "Data/crops_100K"
df_train = pd.read_csv("Data/listes_training/data_100K/train_100K.csv", delimiter=' ')
df_test = pd.read_csv("Data/listes_training/data_100K/test_students.csv", delimiter=' ')

# drop the rows where FaceOcclusion is empty & add binary gender column
df_train = df_train.dropna(subset=['FaceOcclusion'])
df_train['gender_id'] = np.round(df_train['gender'] ).astype(int)

# reset the index
#df_train = df_train.reset_index(drop=True)
print(df_train.index)
df_train.head(-10)

Index([     0,      1,      2,      3,      4,      5,      6,      7,      8,
            9,
       ...
       101335, 101336, 101337, 101338, 101339, 101340, 101341, 101342, 101343,
       101344],
      dtype='int64', length=101341)


Unnamed: 0,filename,FaceOcclusion,gender,gender_id
0,database1/img00011271.jpg,0.019,0.999,1
1,database1/img00012471.jpg,0.035,1.000,1
2,database1/img00008127.jpg,0.127,0.001,0
3,database1/img00008972.jpg,0.014,0.999,1
4,database1/img00028187.jpg,0.346,0.982,1
...,...,...,...,...
101330,database3/database3/m.01drbr/59-FaceId-0_align...,0.000,1.000,1
101331,database3/database3/m.01drbr/69-FaceId-0_align...,0.021,0.998,1
101332,database3/database3/m.01drbr/7-FaceId-0_align.jpg,0.008,1.000,1
101333,database3/database3/m.01drbr/71-FaceId-0_align...,0.005,1.000,1


In [7]:
df_test = pd.read_csv('Data/listes_training/data_100K/test_students.csv')
df_test = df_test.dropna()
df_test.head(-5)

Unnamed: 0,filename
0,database2/database2/test/0.jpg
1,database2/database2/test/1.jpg
2,database2/database2/test/2.jpg
3,database2/database2/test/3.jpg
4,database2/database2/test/4.jpg
...,...
30497,database3/database3/m.01507p/8-FaceId-0_align.jpg
30498,database3/database3/m.01507p/80-FaceId-0_align...
30499,database3/database3/m.01507p/81-FaceId-0_align...
30500,database3/database3/m.01507p/82-FaceId-0_align...


In [8]:
########################################################
# PREPROCESSING DES IMAGES TEST ET DATAFRAME TEST
########################################################

test_preprocess = False # set to True to process all the images
img_dir = "Data/crops_100K"

if test_preprocess == True:
    #initialisation
    df_test['db_number'] = df_test['filename'].apply(lambda x: (x.split('/')[1])[-1])
    df_test['db_number'] = df_test['db_number'].astype(int)
    df_test['black_and_white']=0 # 0 = color 1 = black & white
    df_test['image_width'] = 0
    df_test['image_height'] = 0
    df_test['channels'] = 0
    df_test['pixels'] = 0
    df_test['face'] = 1   # 1 = 1 face detected, 0 = no face detected
    df_test['face_pixels'] = 0
    df_test['mask_pixels'] = 0
    df_test['count'] = 1
    test_no_face_indices = []

    for i in df_test.index:
        if i % 5000 == 0: print(i)

        # load image; convert and save information
        image = cv2.imread(f"{image_dir}/{df_test.loc[i]['filename']}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if is_black_and_white(image):
            df_test.loc[i, 'black_and_white'] = 1
        df_test.loc[i, 'image_width'] = image.shape[0]
        df_test.loc[i, 'image_height'] = image.shape[1]
        df_test.loc[i, 'pixels'] = image.shape[0] * image.shape[1]
        df_test.loc[i, 'channels'] = image.shape[2]

        # process image
        results = face_mesh.process(image)
        mask, skin_area = get_masked_image(image, results)
        contours = get_contours(image, results)
        mesh = get_mesh(image, results)
        cv2.imwrite('Data/Mediapipe/masked_images_test/' + str(i) + '_masked.jpg', cv2.cvtColor(skin_area, cv2.COLOR_RGB2BGR))
        cv2.imwrite('Data/Mediapipe/contoured_images_test/' + str(i) + '_mesh.jpg', cv2.cvtColor(contours, cv2.COLOR_RGB2BGR))
        cv2.imwrite('Data/Mediapipe/meshed_images_test/' + str(i) + '_contour.jpg', cv2.cvtColor(mesh, cv2.COLOR_RGB2BGR))
        
        # save mask information to df and landmarks (if any) to a file
        if results.multi_face_landmarks: #face detected

            # save mask information
            df_test.loc[i, 'face_pixels'] = mask[:,:,0].sum() / 255 # white pixels only ( value = 255) on 1 channel only
            df_test.loc[i, 'mask_pixels'] = df_test.loc[i, 'pixels'] - df_test.loc[i, 'face_pixels']

            # save landmarks to file
            for f, face_landmarks in enumerate(results.multi_face_landmarks):
                if f==0:
                    landmarks_list = face_landmarks
                    with open('Data/Mediapipe/keypoints_test/' + str(i) + '_landmarks.pkl', 'wb') as file:
                        pickle.dump(landmarks_list, file)
        else:
            df_test.loc[i, 'face'] = 0
            df_test.loc[i, 'face_pixels'] = df_test.loc[i, 'pixels']
            df_test.loc[i, 'mask_pixels'] = 0
            test_no_face_indices.append(i)

    df_test['color']= df_test['black_and_white'].apply(lambda x: 0 if x == True else 1)
    df_test['no_color']= 1 - df_test['color']
    # Save the dataframe and the no_face_indices
    with open('Data/Mediapipe/df_test_.pkl', 'wb') as f: pickle.dump(df_test, f)
    with open('Data/Mediapipe/test_no_face_indices.pkl', 'wb') as f: pickle.dump(test_no_face_indices, f)

else :
    print('Images already processed.\nSet test_preprocess to "True" to process all the images')   
    # Load the dataframe and the no_face_indices
    with open('Data/Mediapipe/df_test.pkl', 'rb') as f: df_test = pickle.load(f)
    with open('Data/Mediapipe/test_no_face_indices.pkl', 'rb') as f: no_face_indices_test = pickle.load(f)
    print("df_test and no_face_indices_test are loaded")  

Images already processed.
Set test_preprocess to "True" to process all the images


FileNotFoundError: [Errno 2] No such file or directory: 'Data/Mediapipe/df_test.pkl'

In [9]:
########################################################
# PREPROCESSING DES IMAGES TEST ET DATAFRAME TEST
# (circa 1 hour with GPU)
########################################################

train_preprocess = False # set to True to process all the images
img_dir = "Data/crops_100K"

if train_preprocess == True:

    #initialisation
    df_train['db_number'] = df_train['filename'].apply(lambda x: (x.split('/')[0])[-1])
    df_train['db_number'] = df_train['db_number'].astype(int)
    df_train['black_and_white']=0 # 0 = color 1 = black & white
    df_train['image_width'] = 0
    df_train['image_height'] = 0
    df_train['channels'] = 0
    df_train['pixels'] = 0
    df_train['face'] = 1   # 1 = 1 face detected, 0 = no face detected
    df_train['face_pixels'] = 0
    df_train['mask_pixels'] = 0
    df_train['count'] = 1
    train_no_face_indices = []

    for i in df_train.index:
        if i == 100: print(i)
        if i == 1000: print(i)
        if i % 5000 == 0: print(i)

        # load image; convert and save information
        image = cv2.imread(f"{image_dir}/{df_train.loc[i]['filename']}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if is_black_and_white(image):
            df_train.loc[i, 'black_and_white'] = 1
        df_train.loc[i, 'image_width'] = image.shape[0]
        df_train.loc[i, 'image_height'] = image.shape[1]
        df_train.loc[i, 'pixels'] = image.shape[0] * image.shape[1]
        df_train.loc[i, 'channels'] = image.shape[2]

        # process image
        results = face_mesh.process(image)
        mask, skin_area = get_masked_image(image, results)
        contours = get_contours(image, results)
        mesh = get_mesh(image, results)
        cv2.imwrite('Data/Mediapipe/masked_images/' + str(i) + '_masked.jpg', cv2.cvtColor(skin_area, cv2.COLOR_RGB2BGR))
        cv2.imwrite('Data/Mediapipe/contoured_images/' + str(i) + '_mesh.jpg', cv2.cvtColor(contours, cv2.COLOR_RGB2BGR))
        cv2.imwrite('Data/Mediapipe/meshed_images/' + str(i) + '_contour.jpg', cv2.cvtColor(mesh, cv2.COLOR_RGB2BGR))
        
        # save mask info in df and landmarks  (if any) to a file
        if results.multi_face_landmarks: #face detected

            # save mask information
            df_train.loc[i, 'face_pixels'] = mask[:,:,0].sum() / 255 # white pixels only ( value = 255) on 1 channel only
            df_train.loc[i, 'mask_pixels'] = df_train.loc[i, 'pixels'] - df_train.loc[i, 'face_pixels']

            # save landmarks to file
            for f, face_landmarks in enumerate(results.multi_face_landmarks):
                if f==0:
                    landmarks_list = face_landmarks
                    with open('Data/Mediapipe/keypoints/' + str(i) + '_landmarks.pkl', 'wb') as file:
                        pickle.dump(landmarks_list, file)

        else:
            df_train.loc[i, 'face'] = 0
            df_train.loc[i, 'face_pixels'] = df_train.loc[i, 'pixels']
            df_train.loc[i, 'mask_pixels'] = 0
            train_no_face_indices.append(i)

    df_train['color']= df_train['black_and_white'].apply(lambda x: 0 if x == True else 1)
    df_train['no_color']= 1 - df_train['color']

    # Save the dataframe and the no_face_indices
    with open('Data/Mediapipe/df_train_.pkl', 'wb') as f: pickle.dump(df_train, f)
    with open('Data/Mediapipe/train_no_face_indices_.pkl', 'wb') as f: pickle.dump(train_no_face_indices, f)

else:
    print("Set train_preprocess to True to process all the images")
    # Load the dataframe and the no_face_indices
    with open('Data/Mediapipe/df_train.pkl', 'rb') as f: df_train = pickle.load(f)
    with open('Data/Mediapipe/train_no_face_indices.pkl', 'rb') as f: no_face_indices_train = pickle.load(f)
    print("df_train and no_face_indices_train are loaded")  

df_train.head(-10)

Set train_preprocess to True to process all the images


FileNotFoundError: [Errno 2] No such file or directory: 'Data/Mediapipe/df_train.pkl'

In [3]:

df_train.describe()
# with open('Data/Mediapipe/df_train.pkl', 'wb') as f: pickle.dump(df_train, f)

Unnamed: 0,FaceOcclusion,gender,gender_id
count,101341.0,101341.0,101341.0
mean,0.087769,0.599738,0.60108
std,0.09053,0.48611,0.489679
min,0.0,0.0,0.0
25%,0.019,0.001,0.0
50%,0.054,0.996,1.0
75%,0.131,0.999,1.0
max,1.0,1.0,1.0


In [10]:
stats_train = df_train.groupby('db_number')['count','color','no_color'].sum()
stats_train.loc['total'] = stats_train.sum()
# add column with percentage of no color images
stats_train['no_color_%'] = stats_train['no_color'] / stats_train['count'] * 100
stats_train

KeyError: 'db_number'