## Face Detection

In [1]:
import os
import cv2
import math
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
import numpy as np
 
# Detect face
def face_detection(img):
    faces = face_detector.detectMultiScale(img, 1.1, 4)
    if (len(faces) <= 0):
        return img
    else:
        X, Y, W, H = faces[0]
        face = img[int(Y):int(Y+H), int(X):int(X+W)]
        eyes = eye_detector.detectMultiScale(face, scaleFactor = 1.2, minNeighbors = 4)
        if len(eyes) <= 0:
            return img
        return face
 

opencv_home = cv2.__file__
folders = opencv_home.split(os.path.sep)[0:-1]
path = folders[0]
for folder in folders[1:]:
    path = path + "/" + folder
path_for_face = path+"/data/haarcascade_frontalface_default.xml"
path_for_eyes = path+"/data/haarcascade_eye.xml"
 

face_detector = cv2.CascadeClassifier(path_for_face)
eye_detector = cv2.CascadeClassifier(path_for_eyes)


## Create UAM Dataset 

In [2]:
UTKFACE_PATH = '../Datasets/UTKFace/UTKFace/UTKface/'
AGEDB_PATH = '../Datasets/AgeDB/AgeDB/'
MEGAAGE_PATH = '../Datasets/megaage_asian/megaage_asian/train/'
UAM_PATH = '../Datasets/UAM/images/'

utkface_content = os.listdir(UTKFACE_PATH)
agedb_content = os.listdir(AGEDB_PATH)

with open('../Datasets/megaage_asian/megaage_asian/list/train_name.txt') as f:
    megaage_content = f.read().split('\n')[:-1]

with open('../Datasets/megaage_asian/megaage_asian/list/train_age.txt') as f2:
    megaage_content_age = f2.read().split('\n')[:-1]

index = 1
for addr in utkface_content:
    true_age = int(addr.split('_')[0])
    if true_age >=0 and true_age < 101:
        img = cv2.imread(UTKFACE_PATH + addr)
        digits_NO = len(str(index))
        cv2.imwrite(UAM_PATH + '0'*(6-digits_NO) + str(index) + '_' + str(true_age) + '_' + '.jpg' ,img)
        index += 1

for addr in agedb_content:
    true_age = int(addr.split('_')[1][:-4])
    if true_age >=0 and true_age < 101:
        img = cv2.imread(AGEDB_PATH + addr)
        face = face_detection(img)
        digits_NO = len(str(index))
        cv2.imwrite(UAM_PATH + '0'*(6-digits_NO) + str(index) + '_' + str(true_age) + '_' + '.jpg' ,face)
        index += 1

for i in range(len(megaage_content)):
    true_age = int(megaage_content_age[i])
    if true_age >=0 and true_age < 101:
        img = cv2.imread(MEGAAGE_PATH + megaage_content[i])
        face = face_detection(img)
        digits_NO = len(str(index))
        cv2.imwrite(UAM_PATH + '0'*(6-digits_NO) + str(index) + '_' + str(true_age) + '_' + '.jpg' ,face)
        index += 1


## Generating label distribution

In [3]:
import numpy as np
from scipy.special import softmax
import os

UAM_PATH = '../Datasets/UAM/images/'

def normal_dist(x , mu , std):
    prob_density = (np.pi*std) * np.exp(-0.5*((x-mu)/std)**2)
    return prob_density

uam_N = 101 # number of ages in the dataset
to_file = ""
sigma = 5

for image_name in os.listdir(UAM_PATH):
    true_age = int(image_name.split('_')[1])
    mu = true_age
    generated_numbers = np.random.uniform(0, uam_N , uam_N)
    generated_numbers.sort()
    y = normal_dist(generated_numbers, mu, sigma)
    y = softmax(y)
    to_file += ' '.join(list(map(str, y))) + '\n'

with open("../Datasets/UAM/dist/UAM_dist.txt", "w") as f:
    f.write(to_file)