# Mated and non-mated children and adults 

This notebook creates the dataframes for mated and non_mated adults and children 



In [2]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import os
import json
import numpy as np
import os
import shutil

## Functions

In [30]:

def list_files_from_subfolders(root_dir):
    """
    List files from subdirectories within the 'adults' root directory which contain more than one subdirectory.

    Parameters:
        root_dir (str): The root directory containing the 'adults' directory.

    Returns:
        list of str: Filenames prefixed with their immediate parent directory name from subdirectories that contain more than one subdirectory.
    """
    all_files = []  # List to store filenames
    single_files=[]
    # Check if the root_dir exists and is a directory
    if not os.path.isdir(root_dir):
        print(f"The path {root_dir} is not a valid directory.")
        return all_files

    # Iterate over the items in the 'adults' directory
    for item in os.listdir(root_dir):
        item_path = os.path.join(root_dir, item)
        # Proceed only if the item is a directory
        if os.path.isdir(item_path):
            subdirs = [d for d in os.listdir(item_path) if os.path.isdir(os.path.join(item_path, d))]
            # If the current directory contains more than one subdirectory
            if len(subdirs) > 1:
                # Iterate over each subdirectory
                for subdir in subdirs:
                    subdir_path = os.path.join(item_path, subdir)
                    # Add the file names within this subdirectory to the list
                    files = [os.path.join(item, subdir, f) for f in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, f))]
                    all_files.extend(files)
            elif len(subdirs)==1:
                for subdir in subdirs:
                    subdir_path = os.path.join(item_path, subdir)
                    # Add the file names within this subdirectory to the list
                    files = [os.path.join(item, subdir, f) for f in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, f))]
                    single_files.extend(files)

    return all_files, single_files



def list_files_from_folders_with_multiple_files_child(root_dir):
    """
    List files from folders within the 'children' root directory that contain more than one file.

    Parameters:
        root_dir (str): The root directory containing the 'children' directory.

    Returns:
        list of str: Strings representing each file in the folders that contain more than one file.
    """
    folder_files_list = []  # List to store folder/file strings
    single_files_list = []

    # Iterate over the items in the 'children' directory
    for folder_name in os.listdir(root_dir):
        folder_path = os.path.join(root_dir, folder_name)
        # Proceed only if the item is a directory
        if os.path.isdir(folder_path):
            files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
            # Proceed only if the folder contains more than one file
            if len(files) > 1:
                # Create a string for each file
                for file in files:
                    folder_files_list.append(f"{folder_name}/{file}")
            elif len(files) == 1:
                for file in files:
                    single_files_list.append(f"{folder_name}/{file}")
    return folder_files_list,single_files_list





def list_files_from_subfolders_RFW_full(root_dir):
    """
    List files from subdirectories within the 'adults_full' root directory. If a subdirectory contains more than one file,
    these files are added to the `all_files` list. If a subdirectory contains exactly one file, this file is added to
    the `single_files` list.

    Parameters:
        root_dir (str): The root directory containing the 'adults_full' directory.

    Returns:
        tuple: A tuple containing two lists:
               - `all_files` with filenames from directories that contain multiple files,
               - `single_files` with filenames from directories that contain exactly one file.
    """
    all_files = []  # List to store filenames from directories with multiple files
    single_files = []  # List to store filenames from directories with exactly one file

    # Path to the 'adults_full' directory
    adults_full_path = os.path.join(root_dir, 'data_adults_full_1')
    # Check if the adults_full_path exists and is a directory
    if not os.path.isdir(adults_full_path):
        print(f"The path {adults_full_path} is not a valid directory.")
        return all_files, single_files

    # Iterate over the items in the 'adults_full' directory
    for item in os.listdir(adults_full_path):
        item_path = os.path.join(adults_full_path, item)
        # Proceed only if the item is a directory
        if os.path.isdir(item_path):
            # Get all files within this directory
            files = [f for f in os.listdir(item_path) if os.path.isfile(os.path.join(item_path, f))]
            # Categorize files based on count in their directory
            if len(files) > 1:
                all_files.extend(os.path.join(item, f) for f in files)
            elif len(files) == 1:
                single_files.append(os.path.join(item, files[0]))

    return all_files, single_files


In [4]:


def move_and_rename_data_folders(base_path):
    # List of the racial folders to process
    racial_folders = ['African', 'Caucasian', 'Asian', 'Indian']

    # Iterate through each racial folder
    for race in racial_folders:
        race_path = os.path.join(base_path, race)

        # Check if the folder exists
        if os.path.exists(race_path):
            # List all subfolders in the racial folder
            subfolders = [f for f in os.listdir(race_path) if os.path.isdir(os.path.join(race_path, f))]

            # Move and rename each subfolder
            for folder in subfolders:
                old_path = os.path.join(race_path, folder)
                new_name = f"{race}_{folder}"
                new_path = os.path.join(base_path, new_name)

                # Move the folder to the new location with the new name
                shutil.move(old_path, new_path)
                print(f"Moved and renamed {old_path} to {new_path}")
        else:
            print(f"No folder found for {race}")

# base_directory = '/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/data_adults_full_1'
# move_and_rename_data_folders(base_directory)


In [5]:
def extract_unique_identifiers(file_list):
    """
    Extracts a unique identifier from a list of file paths.

    Parameters:
        file_list (list): A list of strings containing file paths.

    Returns:
        list: A list of unique identifiers extracted from the file paths.
    """
    identifiers = []
    for file_path in file_list:
        # Extract the base filename without extension and path
        base_name = os.path.splitext(os.path.basename(file_path))[0]
        identifiers.append(base_name)

    return identifiers


def drop_after_zeros(s):
    """
    Drops everything from the input string after the first occurrence of three consecutive zeros
    and also drops the last underscore before the zeros.

    Parameters:
        s (str): The input string.

    Returns:
        str: The string up to, but not including, the last underscore before the three consecutive zeros.
    """
    # Find the index of the first occurrence of three consecutive zeros
    zero_index = s.find('000')

    # If three consecutive zeros are found
    if zero_index != -1:
        # Find the last underscore before the '000'
        underscore_index = s.rfind('_', 0, zero_index)
        # If an underscore is found, return the substring up to that point
        if underscore_index != -1:
            return s[:underscore_index]
        else:
            return s[:zero_index]
    else:
        return s  # If no '000' sequence is found, return the original string


def load_data(path):
    f = open(path)
    data = json.load(f)
    df = pd.DataFrame.from_dict(data, orient='index', columns=['Age'])
    f.close()

    # removing nans
    nans = df['Age'].isna().sum()
    df = df.dropna()

    describe = df.describe()
    value_counts = df.value_counts()

    # make new column for the identity based on the index
    df['Identity'] = df.index
    df['Identity'] = df.index.str.rsplit('_', 1).str[0]

    df = df.sort_values(by='Age')

    return df, nans, describe, value_counts


def drop_number_after_last_underscore(df, column_name):
    """
    Drops the number after the last underscore from the string in the specified column of a DataFrame.

    Parameters:
        df (pd.DataFrame): The DataFrame containing the column to process.
        column_name (str): The name of the column to process.

    Returns:
        pd.DataFrame: The DataFrame with the modified column.
    """
    # Check if the column exists in the DataFrame
    if column_name in df.columns:
        # Split the string by underscore and drop the last part
        df[column_name] = df[column_name].apply(lambda x: '_'.join(x.split('_')[:-1]))
    else:
        raise ValueError(f"The column {column_name} does not exist in the DataFrame.")

    return df


## Loading the ages

In [6]:
df_YLFW = load_data('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/age_estimations/YLFW_output_data_precroped_all_age.json')[0]
df_RFW_african = load_data('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/age_estimations/RFW_age_estimations/output_african.json')[0]
df_RFW_asian = load_data('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/age_estimations/RFW_age_estimations/output_asian.json')[0]
df_RFW_caucasian = load_data('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/age_estimations/RFW_age_estimations/output_caucasian.json')[0]
df_RFW_indian = load_data('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/age_estimations/RFW_age_estimations/output_indian.json')[0]
df_RFW = pd.concat([df_RFW_african, df_RFW_asian, df_RFW_caucasian, df_RFW_indian])
df_RFW.Age = df_RFW.Age.astype(int)
df_RFW = df_RFW.sort_values(by='Age', ascending=True)

age_df = pd.concat([df_YLFW,df_RFW])
age_df['image_name'] = age_df.index

# Mated adults

In [43]:

adults_dir = '/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/'
files_list = list_files_from_subfolders_RFW_full(adults_dir)[0]
image_names = extract_unique_identifiers(files_list)


In [9]:
len(files_list)

40607

## finding the difference between the OFIQ already made and the entire RFW folder

In [22]:
def extract_substrings(original_list):
    new_list = []
    for item in original_list:
        # Find the start index by locating the position of the first slash and adding 1
        start_index = item.find('/') + 1
        # Find the end index by locating the position of '.jpg'
        end_index = item.find('.jpg')
        # Extract the substring from start_index to end_index
        if start_index > 0 and end_index > 0:  # Ensure indices are valid
            substring = item[start_index:end_index]
            new_list.append(substring)
    return new_list

# original_list = files_list
# new_list = extract_substrings(original_list)
# print(new_list)

['m.010lz5_0001', 'm.010lz5_0002', 'm.010lz5_0003', 'm.011y5k_0001', 'm.011y5k_0002', 'm.011y5k_0003', 'm.011y5k_0004', 'm.01257s_0001', 'm.01257s_0002', 'm.01257s_0003', 'm.0126y2_0001', 'm.0126y2_0002', 'm.0126y2_0003', 'm.0126y2_0004', 'm.012f1d_0001', 'm.012f1d_0002', 'm.012f1d_0003', 'm.012gl8_0001', 'm.012gl8_0002', 'm.012gl8_0003', 'm.012gr9_0001', 'm.012gr9_0002', 'm.012mh__0001', 'm.012mh__0002', 'm.012mh__0003', 'm.012mh__0004', 'm.012mh__0005', 'm.012mh__0006', 'm.012mk7_0001', 'm.012mk7_0002', 'm.012mk7_0003', 'm.012mmb_0001', 'm.012mmb_0002', 'm.012mmb_0003', 'm.012qsp_0001', 'm.012qsp_0002', 'm.012qsp_0003', 'm.012qsp_0004', 'm.012tmz_0001', 'm.012tmz_0002', 'm.012tmz_0003', 'm.012tmz_0004', 'm.012vd6_0001', 'm.012vd6_0002', 'm.012vd6_0003', 'm.012vk4_0001', 'm.012vk4_0002', 'm.012vk4_0003', 'm.0139ps_0001', 'm.0139ps_0002', 'm.0139ps_0003', 'm.0139ps_0004', 'm.013w7j_0001', 'm.013w7j_0002', 'm.013w7j_0003', 'm.015m2v_0001', 'm.015m2v_0002', 'm.015pz3_0001', 'm.015pz3_000

In [10]:
# OFIQ_a = pd.read_csv('../../data/OFIQ_results/table_adults_all_in_one.csv', sep=',')
# OFIQ_a

Unnamed: 0,Filename,UnifiedQualityScore,BackgroundUniformity,IlluminationUniformity,LuminanceMean,LuminanceVariance,UnderExposurePrevention,OverExposurePrevention,DynamicRange,Sharpness,...,DownwardCropOfTheFaceImage.scalar,UpwardCropOfTheFaceImage.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar,ExpressionNeutrality.scalar,NoHeadCoverings.scalar,Unnamed: 57,image_name,identity_name
0,../../../data/tests/images/adults_full_final/m...,22.843954,71.516904,0.499635,0.337319,0.047136,0.198074,0.000000,7.576965,50,...,58,23,99,99,100,69,0,,m.010g87_0002,m.010g87
1,../../../data/tests/images/adults_full_final/m...,21.374777,37.258586,0.584877,0.221608,0.016571,0.140255,0.000000,6.845002,-11,...,9,1,100,100,100,36,100,,m.010lz5_0001,m.010lz5
2,../../../data/tests/images/adults_full_final/m...,21.944078,31.729164,0.332908,0.114954,0.014525,0.558846,0.000000,6.167718,-31,...,23,7,76,81,97,34,100,,m.010lz5_0002,m.010lz5
3,../../../data/tests/images/adults_full_final/m...,21.284401,97.060087,0.685185,0.162128,0.020428,0.432749,0.000000,6.572763,-26,...,94,95,99,76,100,49,100,,m.010lz5_0003,m.010lz5
4,../../../data/tests/images/adults_full_final/m...,18.683163,64.572584,0.532599,0.343895,0.013737,0.008838,0.000000,6.679960,-14,...,12,2,100,99,100,51,100,,m.0112yl_0001,m.0112yl
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36437,../../../data/tests/images/adults_full_final/m...,22.174162,198.838779,0.625390,0.302620,0.035789,0.109209,0.000000,7.330758,-24,...,6,1,91,99,100,12,100,,m.0zwh1k1_0003,m.0zwh1k1
36438,../../../data/tests/images/adults_full_final/m...,24.823811,168.138308,0.522373,0.293775,0.009479,0.033023,0.000000,6.655987,-41,...,11,2,89,97,100,13,100,,m.0zwh1k1_0004,m.0zwh1k1
36439,../../../data/tests/images/adults_full_final/m...,18.592001,66.181583,0.542700,0.705963,0.052286,0.001681,0.153326,6.642255,-34,...,16,3,90,100,100,57,100,,m.0_4pw_0002,m.0_4pw
36440,../../../data/tests/images/adults_full_final/m...,20.047659,34.442842,0.392734,0.258039,0.041665,0.294725,0.000000,7.303942,-9,...,47,15,89,99,100,68,100,,m.0_4pw_0003,m.0_4pw


In [None]:
def copy_images(image_folder, target_folder, new_list, original_list, df):
    # Ensure the target folder exists, create if not
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)

    # Set to improve lookup speed
    df_image_set = set(df['image_name'])

    # Create a mapping from image name to full path using original_list
    image_path_map = {}
    for full_path in original_list:
        # Extract the substring that matches items in new_list
        base_name = os.path.basename(full_path)  # Get the filename from the path
        name_without_extension = os.path.splitext(base_name)[0]  # Remove extension
        if any(name in name_without_extension for name in new_list):
            image_path_map[name_without_extension] = full_path

    # Loop through each image name in new_list
    for image_name in new_list:
        # Check if the image name is not in the DataFrame's image_name column
        if image_name not in df_image_set:
            # Check if the image_name has a corresponding full path in the map
            if image_name in image_path_map:
                full_path = image_path_map[image_name]
                source_image_path = os.path.join(image_folder, full_path)
                target_image_path = os.path.join(target_folder, os.path.basename(full_path))

                # Check if the source image exists before copying
                if os.path.exists(source_image_path):
                    # Copy image to the target folder
                    shutil.copy(source_image_path, target_image_path)
                    print(f"Copied {full_path} to {target_folder}")
                else:
                    print(f"Image {full_path} not found in {image_folder}")
            else:
                print(f"No source path found for {image_name}")

# image_folder = '/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/data_adults_full_1'
# target_folder = '/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/children_from_RFW_to_OFIQ'
# new_list = new_list
# df= OFIQ_a
# original_list = original_list

# copy_images(image_folder, target_folder, new_list, original_list, df)


Copied African_m.012mh_/m.012mh__0001.jpg to /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/children_from_RFW_to_OFIQ
Copied African_m.012mh_/m.012mh__0005.jpg to /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/children_from_RFW_to_OFIQ
Copied African_m.015pz3/m.015pz3_0003.jpg to /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/children_from_RFW_to_OFIQ
Copied African_m.015q3m/m.015q3m_0004.jpg to /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/children_from_RFW_to_OFIQ
Copied African_m.017729/m.017729_0001.jpg to /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/children_from_RFW_to_OFIQ
Copied African_m.0183qt/m.0183qt_0006.jpg to /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/children_from_RFW_to_OFIQ
Copied African_m.01c_3f/m.01c_3f_0002.jpg to /

### before moving on - OFIQ need to run on the samples not already in table_adults_all_in_one.csv

### move all files with are not mentioned in table_adults_all_in_one.csv


In [44]:

identity_names = []
for i in image_names:
    identity_name = drop_after_zeros(i)

    identity_names.append(identity_name)

DF = pd.DataFrame(
    {'files_list': files_list,
     'image_name': image_names,
     'identity_name': identity_names,
     'enrolled' : 'enrolled'})
DF['ethnicity'] = DF['files_list'].apply(lambda x: x.split('_')[0] if isinstance(x, str) and x.split() else None)
OFIQ_a_lille = pd.read_csv('../../data/OFIQ_results/table_adults_all_in_one.csv', sep=',')
OFIQ_c = pd.read_csv('../../data/OFIQ_results/OFIQ_children_all.csv', sep=',')
OFIQ_a_missing = pd.read_csv('../../data/OFIQ_results/table_adults_missing.csv', sep=';')
OFIQ_a = pd.concat([OFIQ_a_missing, OFIQ_a_lille])
OFIQ_c['image_name']= OFIQ_c['Filename'].apply(lambda x: x.split('/')[-1])
OFIQ_c['image_name']= OFIQ_c['image_name'].apply(lambda x: x.split('.')[0])
OFIQ_a['image_name']=OFIQ_a['Filename'].apply(lambda x: x.split('/')[-1])
OFIQ_a['image_name']=OFIQ_a['image_name'].apply(lambda x: x.split('.jpg')[0])
OFIQ = pd.concat([OFIQ_a,OFIQ_c])

final_adult = pd.merge(DF, age_df, on='image_name', how='left')
final_adult = final_adult.merge(OFIQ[['image_name', 'UnifiedQualityScore.scalar', 'HeadPoseYaw.scalar','HeadPosePitch.scalar','HeadPoseRoll.scalar']],
                                on='image_name',
                                how='left')


In [48]:
final_adult

Unnamed: 0,files_list,image_name,identity_name,enrolled,ethnicity,Age,Identity,UnifiedQualityScore.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar
0,African_m.010lz5/m.010lz5_0001.jpg,m.010lz5_0001,m.010lz5,enrolled,African,38,m.010lz5,35,100,100,100
1,African_m.010lz5/m.010lz5_0002.jpg,m.010lz5_0002,m.010lz5,enrolled,African,29,m.010lz5,40,76,81,97
2,African_m.010lz5/m.010lz5_0003.jpg,m.010lz5_0003,m.010lz5,enrolled,African,37,m.010lz5,34,99,76,100
3,African_m.011y5k/m.011y5k_0001.jpg,m.011y5k_0001,m.011y5k,enrolled,African,40,m.011y5k,5,90,100,100
4,African_m.011y5k/m.011y5k_0002.jpg,m.011y5k_0002,m.011y5k,enrolled,African,43,m.011y5k,51,100,100,100
...,...,...,...,...,...,...,...,...,...,...,...
40676,Indian_m.0vshsz9/m.0vshsz9_0003.jpg,m.0vshsz9_0003,m.0vshsz9,enrolled,Indian,25,m.0vshsz9,76,99,96,100
40677,Indian_m.0_4pw/m.0_4pw_0001.jpg,m.0_4pw_0001,m.0_4pw,enrolled,Indian,38,m.0_4pw,28,100,100,100
40678,Indian_m.0_4pw/m.0_4pw_0002.jpg,m.0_4pw_0002,m.0_4pw,enrolled,Indian,64,m.0_4pw,16,90,100,100
40679,Indian_m.0_4pw/m.0_4pw_0003.jpg,m.0_4pw_0003,m.0_4pw,enrolled,Indian,37,m.0_4pw,24,89,99,100


In [49]:
final_adult.to_csv('mated_adults_image_info.csv', index=False)

In [11]:
# sammenlign med RFW_full_info_excluding_children.csv , only keep what matched

# Nonmated adults


In [31]:
adults_dir = '../../data/data_full/'
files_list = list_files_from_subfolders_RFW_full(adults_dir)[1]
image_names = extract_unique_identifiers(files_list)


In [34]:
len(files_list)

0

In [33]:
OFIQ

Unnamed: 0,Filename,UnifiedQualityScore,BackgroundUniformity,IlluminationUniformity,LuminanceMean,LuminanceVariance,UnderExposurePrevention,OverExposurePrevention,DynamicRange,Sharpness,...,DownwardCropOfTheFaceImage.scalar,UpwardCropOfTheFaceImage.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar,ExpressionNeutrality.scalar,NoHeadCoverings.scalar,Unnamed: 57,image_name,identity_name
0,../../../data/tests/images/children_from_RFW_t...,19.218674,129.863206,0.397959,0.236675,0.025831,0.262599,0.0,6.828580,-37,...,2,0,55,52,74,67,100,,m.010g87_0001,
1,../../../data/tests/images/children_from_RFW_t...,24.672916,100.015695,0.253444,0.228484,0.033573,0.325271,0.0,7.000783,-42,...,31,10,96,90,100,88,100,,m.0112yl_0003,
2,../../../data/tests/images/children_from_RFW_t...,22.201094,112.332005,0.331019,0.430565,0.042432,0.030190,0.0,7.483033,-42,...,14,4,96,89,99,41,100,,m.01167d3z_0002,
3,../../../data/tests/images/children_from_RFW_t...,20.812151,34.957952,0.365917,0.299761,0.056745,0.226444,0.0,7.174050,-33,...,19,4,98,97,100,77,100,,m.01167d3z_0004,
4,../../../data/tests/images/children_from_RFW_t...,23.644115,34.983350,0.068871,0.354744,0.032225,0.117435,0.0,7.324517,-37,...,16,3,99,98,100,53,100,,m.011xl3_0003,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9883,../../../data/tests/images/children_full_moved...,20.684898,68.836203,0.023669,0.227287,0.020213,0.182460,0.0,6.984332,8,...,1,0,99,98,100,76,100,,African_665_1,African_665
9884,../../../data/tests/images/children_full_moved...,17.902210,239.310779,0.300154,0.180025,0.005550,0.142604,0.0,6.254417,47,...,4,0,99,83,100,17,100,,African_666_1,African_666
9885,../../../data/tests/images/children_full_moved...,19.832048,86.813099,0.241512,0.215531,0.017044,0.114884,0.0,6.760029,42,...,1,0,99,96,100,9,100,,African_666_2,African_666
9886,../../../data/tests/images/children_full_moved...,20.824480,50.255038,0.399449,0.260129,0.012412,0.036529,0.0,6.607944,-40,...,1,0,92,100,100,11,100,,African_666_3,African_666


In [22]:

identity_names = []
for i in image_names:
    identity_name = drop_after_zeros(i)

    identity_names.append(identity_name)

DF = pd.DataFrame(
    {'files_list': files_list,
     'image_name': image_names,
     'identity_name': identity_names,
     'enrolled' : 'non_enrolled'})
DF['ethnicity'] = DF['files_list'].apply(lambda x: x.split('_')[0] if isinstance(x, str) and x.split() else None)

final_adult = pd.merge(DF, age_df, on='image_name', how='left')
final_adult_non = final_adult.merge(OFIQ[['image_name', 'UnifiedQualityScore.scalar', 'HeadPoseYaw.scalar','HeadPosePitch.scalar','HeadPoseRoll.scalar']],
                                on='image_name',
                                how='left')
final_adult_non.to_csv('nonmated_adults_image_info.csv', index=False)


In [50]:
final_adult_non

Unnamed: 0,files_list,identity_name,enrolled,ethnicity,Age,Identity,image_name,UnifiedQualityScore.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar


# Mated children

In [51]:
child_dir = '../../data/data_full/children_full'
# files_list = list_files_from_subfolders(child_dir)
files_list_child = list_files_from_folders_with_multiple_files_child(child_dir)[0]

image_names = extract_unique_identifiers(files_list_child)

identity_names = []
for i in image_names:
    identity_names.append(drop_after_zeros(i))
DF_child = pd.DataFrame(
    {'files_list': files_list_child,
     'image_name': image_names,
     'enrolled' : 'enrolled'})

DF_child['identity_name'] = DF_child['image_name'].apply(lambda x: '_'.join(x.split('_')[:-1]) if isinstance(x, str) and x.split() else None)
DF_child['ethnicity'] = DF_child['files_list'].apply(lambda x: x.split('_')[0] if isinstance(x, str) and x.split() else None)
final_child = pd.merge(DF_child, age_df, on='image_name', how='left')
final_child_mated = final_child.merge(OFIQ[['image_name', 'UnifiedQualityScore.scalar', 'HeadPoseYaw.scalar','HeadPosePitch.scalar','HeadPoseRoll.scalar']],
                                on='image_name',
                                how='left')


In [54]:
final_child_mated

Unnamed: 0,files_list,image_name,enrolled,identity_name,ethnicity,Age,Identity,UnifiedQualityScore.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar
0,African_0/African_0_0.png,African_0_0,enrolled,African_0,African,7,African_0,34.0,100.0,93.0,100.0
1,African_0/African_0_1.png,African_0_1,enrolled,African_0,African,18,African_0,34.0,93.0,100.0,100.0
2,African_1/African_1_11.png,African_1_11,enrolled,African_1,African,5,African_1,65.0,100.0,99.0,100.0
3,African_1/African_1_4.png,African_1_4,enrolled,African_1,African,2,African_1,23.0,95.0,99.0,100.0
4,African_10/African_10_1.png,African_10_1,enrolled,African_10,African,5,African_10,50.0,100.0,97.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...
9099,Indian_m.0j63px2/m.0j63px2_0004.jpg,m.0j63px2_0004,enrolled,m.0j63px2,Indian,15,m.0j63px2,24.0,64.0,98.0,99.0
9100,Indian_m.0jfgrjr/m.0jfgrjr_0001.jpg,m.0jfgrjr_0001,enrolled,m.0jfgrjr,Indian,16,m.0jfgrjr,74.0,92.0,100.0,100.0
9101,Indian_m.0jfgrjr/m.0jfgrjr_0004.jpg,m.0jfgrjr_0004,enrolled,m.0jfgrjr,Indian,16,m.0jfgrjr,84.0,92.0,100.0,100.0
9102,Indian_m.0jwwdq1/m.0jwwdq1_0001.jpg,m.0jwwdq1_0001,enrolled,m.0jwwdq1,Indian,16,m.0jwwdq1,46.0,91.0,97.0,100.0


In [53]:
# count NaN in final_child_mated['UnifiedQualityScore.scalar']
final_child_mated['UnifiedQualityScore.scalar'].isna().sum()

33

In [55]:
final_child_mated.to_csv('mated_children_image_info.csv', index=False)

# Non mated children

In [56]:
child_dir = '/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/children_full'
files_list_child = list_files_from_folders_with_multiple_files_child(child_dir)[1]

image_names = extract_unique_identifiers(files_list_child)

identity_names = []
for i in image_names:
    identity_names.append(drop_after_zeros(i))
DF_child = pd.DataFrame(
    {'files_list': files_list_child,
     'image_name': image_names,
     'enrolled' : 'non_enrolled'})

DF_child['identity_name'] = DF_child['image_name'].apply(lambda x: '_'.join(x.split('_')[:-1]) if isinstance(x, str) and x.split() else None)
DF_child['ethnicity'] = DF_child['files_list'].apply(lambda x: x.split('_')[0] if isinstance(x, str) and x.split() else None)
final_child = pd.merge(DF_child, age_df, on='image_name', how='left')
final_child_non = final_child.merge(OFIQ[['image_name', 'UnifiedQualityScore.scalar', 'HeadPoseYaw.scalar','HeadPosePitch.scalar','HeadPoseRoll.scalar']],
                                on='image_name',
                                how='left')
final_child_non.to_csv('nonmated_children_image_info.csv', index=False)

In [57]:
final_child_non

Unnamed: 0,files_list,image_name,enrolled,identity_name,ethnicity,Age,Identity,UnifiedQualityScore.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar
0,African_110/African_110_0.png,African_110_0,non_enrolled,African_110,African,14,African_110,29.0,100.0,89.0,100.0
1,African_111/African_111_0.png,African_111_0,non_enrolled,African_111,African,21,African_111,30.0,100.0,100.0,100.0
2,African_122/African_122_0.png,African_122_0,non_enrolled,African_122,African,7,African_122,44.0,100.0,99.0,100.0
3,African_123/African_123_0.png,African_123_0,non_enrolled,African_123,African,4,African_123,23.0,87.0,94.0,99.0
4,African_124/African_124_0.png,African_124_0,non_enrolled,African_124,African,7,African_124,8.0,99.0,80.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...
818,Indian_m.0hht8wn/m.0hht8wn_0001.jpg,m.0hht8wn_0001,non_enrolled,m.0hht8wn,Indian,17,m.0hht8wn,94.0,98.0,99.0,100.0
819,Indian_m.0hnbj9t/m.0hnbj9t_0001.jpg,m.0hnbj9t_0001,non_enrolled,m.0hnbj9t,Indian,18,m.0hnbj9t,27.0,100.0,95.0,100.0
820,Indian_m.0j4c85h/m.0j4c85h_0006.jpg,m.0j4c85h_0006,non_enrolled,m.0j4c85h,Indian,17,m.0j4c85h,85.0,85.0,98.0,100.0
821,Indian_m.0k3208/m.0k3208_0001.jpg,m.0k3208_0001,non_enrolled,m.0k3208,Indian,18,m.0k3208,51.0,94.0,99.0,100.0


# Add image_name and identity_name to OFIQ

In [58]:
# OFIQ_a = pd.read_csv('../../data/OFIQ_results/table_adults_all_in_one.csv', sep=',')
# OFIQ_c = pd.read_csv('../../data/OFIQ_results/OFIQ_children_all.csv', sep=',')


In [59]:
# OFIQ_c['image_name']= OFIQ_c['Filename'].apply(lambda x: x.split('/')[-1])
# OFIQ_c['image_name']= OFIQ_c['image_name'].apply(lambda x: x.split('.')[0])
# OFIQ_a['image_name']=OFIQ_a['Filename'].apply(lambda x: x.split('/')[-1])
# OFIQ_a['image_name']=OFIQ_a['image_name'].apply(lambda x: x.split('.jpg')[0])


In [50]:
# def extract_identifier(filepath):
#     filename = filepath.split('/')[-1]
#     parts = filename.split('.')
#     if parts[-2].isdigit():
#         identifier = '.'.join(parts[-3:-1])
#     else:
#         identifier = '.'.join(parts[:-1])  # Join everything except the extension
#     return identifier


# OFIQ_a['identity_name'] = OFIQ_a['Filename'].apply(extract_identifier)
# OFIQ_a['identity_name'] = OFIQ_a['identity_name'].str[:-5]

In [25]:
OFIQ_a.to_csv('table_adults_all_in_one.csv', index=False)

In [26]:
OFIQ_a

Unnamed: 0,Filename,UnifiedQualityScore,BackgroundUniformity,IlluminationUniformity,LuminanceMean,LuminanceVariance,UnderExposurePrevention,OverExposurePrevention,DynamicRange,Sharpness,...,DownwardCropOfTheFaceImage.scalar,UpwardCropOfTheFaceImage.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar,ExpressionNeutrality.scalar,NoHeadCoverings.scalar,Unnamed: 57,image_name,identity_name
0,../../../data/tests/images/children_from_RFW_t...,19.218674,129.863206,0.397959,0.236675,0.025831,0.262599,0.000000,6.828580,-37,...,2,0,55,52,74,67,100,,m.010g87_0001,
1,../../../data/tests/images/children_from_RFW_t...,24.672916,100.015695,0.253444,0.228484,0.033573,0.325271,0.000000,7.000783,-42,...,31,10,96,90,100,88,100,,m.0112yl_0003,
2,../../../data/tests/images/children_from_RFW_t...,22.201094,112.332005,0.331019,0.430565,0.042432,0.030190,0.000000,7.483033,-42,...,14,4,96,89,99,41,100,,m.01167d3z_0002,
3,../../../data/tests/images/children_from_RFW_t...,20.812151,34.957952,0.365917,0.299761,0.056745,0.226444,0.000000,7.174050,-33,...,19,4,98,97,100,77,100,,m.01167d3z_0004,
4,../../../data/tests/images/children_from_RFW_t...,23.644115,34.983350,0.068871,0.354744,0.032225,0.117435,0.000000,7.324517,-37,...,16,3,99,98,100,53,100,,m.011xl3_0003,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36437,../../../data/tests/images/adults_full_final/m...,22.174162,198.838779,0.625390,0.302620,0.035789,0.109209,0.000000,7.330758,-24,...,6,1,91,99,100,12,100,,m.0zwh1k1_0003,m.0zwh1k1
36438,../../../data/tests/images/adults_full_final/m...,24.823811,168.138308,0.522373,0.293775,0.009479,0.033023,0.000000,6.655987,-41,...,11,2,89,97,100,13,100,,m.0zwh1k1_0004,m.0zwh1k1
36439,../../../data/tests/images/adults_full_final/m...,18.592001,66.181583,0.542700,0.705963,0.052286,0.001681,0.153326,6.642255,-34,...,16,3,90,100,100,57,100,,m.0_4pw_0002,m.0_4pw
36440,../../../data/tests/images/adults_full_final/m...,20.047659,34.442842,0.392734,0.258039,0.041665,0.294725,0.000000,7.303942,-9,...,47,15,89,99,100,68,100,,m.0_4pw_0003,m.0_4pw


In [60]:
# def extract_identifier(filepath):
#     filename = filepath.split('/')[-1]
#     # Split the filename by underscores and join all parts except the last one
#     parts = filename.split('_')
#     identifier = '_'.join(parts[:-1])  # Join all parts except the last one to form the identifier
#     return identifier
# OFIQ_c['identity_name'] = OFIQ_c['Filename'].apply(extract_identifier)

In [27]:
OFIQ_c

Unnamed: 0,Filename,UnifiedQualityScore,BackgroundUniformity,IlluminationUniformity,LuminanceMean,LuminanceVariance,UnderExposurePrevention,OverExposurePrevention,DynamicRange,Sharpness,...,DownwardCropOfTheFaceImage.scalar,UpwardCropOfTheFaceImage.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar,ExpressionNeutrality.scalar,NoHeadCoverings.scalar,Unnamed: 57,image_name,identity_name
0,../../../data/tests/images/children_full_moved...,21.233467,162.705495,0.329640,0.084315,0.006262,0.716862,0.0,5.723622,47,...,5,0,100,93,100,14,100,,African_0_0,African_0
1,../../../data/tests/images/children_full_moved...,21.258804,90.363855,0.609568,0.108497,0.010054,0.592055,0.0,6.148537,47,...,3,0,93,100,100,9,100,,African_0_1,African_0
2,../../../data/tests/images/children_full_moved...,19.901749,94.363332,0.240000,0.135628,0.018415,0.502809,0.0,6.472321,40,...,1,0,97,90,99,18,100,,African_100_1,African_100
3,../../../data/tests/images/children_full_moved...,20.915359,139.023632,0.291454,0.152952,0.020221,0.469387,0.0,6.621154,35,...,1,0,100,98,100,74,100,,African_100_3,African_100
4,../../../data/tests/images/children_full_moved...,24.882755,117.132812,0.018519,0.193522,0.026933,0.412887,0.0,6.908666,42,...,1,0,100,96,100,96,100,,African_101_0,African_101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9883,../../../data/tests/images/children_full_moved...,20.684898,68.836203,0.023669,0.227287,0.020213,0.182460,0.0,6.984332,8,...,1,0,99,98,100,76,100,,African_665_1,African_665
9884,../../../data/tests/images/children_full_moved...,17.902210,239.310779,0.300154,0.180025,0.005550,0.142604,0.0,6.254417,47,...,4,0,99,83,100,17,100,,African_666_1,African_666
9885,../../../data/tests/images/children_full_moved...,19.832048,86.813099,0.241512,0.215531,0.017044,0.114884,0.0,6.760029,42,...,1,0,99,96,100,9,100,,African_666_2,African_666
9886,../../../data/tests/images/children_full_moved...,20.824480,50.255038,0.399449,0.260129,0.012412,0.036529,0.0,6.607944,-40,...,1,0,92,100,100,11,100,,African_666_3,African_666


In [28]:
OFIQ_c.to_csv('OFIQ_children_all.csv', index=False)