In [1]:
# First, change the folder structure with move_data.ipynb

# Mated adults 



In [25]:
import os

def list_files_from_subfolders(root_dir):
    """
    List files from subdirectories within the 'adults' root directory which contain more than one subdirectory.

    Parameters:
        root_dir (str): The root directory containing the 'adults' directory.

    Returns:
        list of str: Filenames prefixed with their immediate parent directory name from subdirectories that contain more than one subdirectory.
    """
    all_files = []  # List to store filenames
    single_files=[]
    # Check if the root_dir exists and is a directory
    if not os.path.isdir(root_dir):
        print(f"The path {root_dir} is not a valid directory.")
        return all_files

    # Iterate over the items in the 'adults' directory
    for item in os.listdir(root_dir):
        item_path = os.path.join(root_dir, item)
        # Proceed only if the item is a directory
        if os.path.isdir(item_path):
            subdirs = [d for d in os.listdir(item_path) if os.path.isdir(os.path.join(item_path, d))]
            # If the current directory contains more than one subdirectory
            if len(subdirs) > 1:
                # Iterate over each subdirectory
                for subdir in subdirs:
                    subdir_path = os.path.join(item_path, subdir)
                    # Add the file names within this subdirectory to the list
                    files = [os.path.join(item, subdir, f) for f in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, f))]
                    all_files.extend(files)
            elif len(subdirs)==1:
                for subdir in subdirs:
                    subdir_path = os.path.join(item_path, subdir)
                    # Add the file names within this subdirectory to the list
                    files = [os.path.join(item, subdir, f) for f in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, f))]
                    single_files.extend(files)

    return all_files, single_files



def list_files_from_folders_with_multiple_files_child(root_dir):
    """
    List files from folders within the 'children' root directory that contain more than one file.

    Parameters:
        root_dir (str): The root directory containing the 'children' directory.

    Returns:
        list of str: Strings representing each file in the folders that contain more than one file.
    """
    folder_files_list = []  # List to store folder/file strings
    single_files_list = []

    # Iterate over the items in the 'children' directory
    for folder_name in os.listdir(root_dir):
        folder_path = os.path.join(root_dir, folder_name)
        # Proceed only if the item is a directory
        if os.path.isdir(folder_path):
            files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
            # Proceed only if the folder contains more than one file
            if len(files) > 1:
                # Create a string for each file
                for file in files:
                    folder_files_list.append(f"{folder_name}/{file}")
            elif len(files) == 1:
                for file in files:
                    single_files_list.append(f"{folder_name}/{file}")
    return folder_files_list,single_files_list





def list_files_from_subfolders_RFW_full(root_dir):
    """
    List files from subdirectories within the 'adults_full' root directory. If a subdirectory contains more than one file,
    these files are added to the `all_files` list. If a subdirectory contains exactly one file, this file is added to
    the `single_files` list.

    Parameters:
        root_dir (str): The root directory containing the 'adults_full' directory.

    Returns:
        tuple: A tuple containing two lists:
               - `all_files` with filenames from directories that contain multiple files,
               - `single_files` with filenames from directories that contain exactly one file.
    """
    all_files = []  # List to store filenames from directories with multiple files
    single_files = []  # List to store filenames from directories with exactly one file

    # Path to the 'adults_full' directory
    adults_full_path = os.path.join(root_dir, 'data_adults_full')
    # Check if the adults_full_path exists and is a directory
    if not os.path.isdir(adults_full_path):
        print(f"The path {adults_full_path} is not a valid directory.")
        return all_files, single_files

    # Iterate over the items in the 'adults_full' directory
    for item in os.listdir(adults_full_path):
        item_path = os.path.join(adults_full_path, item)
        # Proceed only if the item is a directory
        if os.path.isdir(item_path):
            # Get all files within this directory
            files = [f for f in os.listdir(item_path) if os.path.isfile(os.path.join(item_path, f))]
            # Categorize files based on count in their directory
            if len(files) > 1:
                all_files.extend(os.path.join(item, f) for f in files)
            elif len(files) == 1:
                single_files.append(os.path.join(item, files[0]))

    return all_files, single_files


In [22]:
import os
import shutil

def move_and_rename_data_folders(base_path):
    # List of the racial folders to process
    racial_folders = ['African', 'Caucasian', 'Asian', 'Indian']

    # Iterate through each racial folder
    for race in racial_folders:
        race_path = os.path.join(base_path, race)

        # Check if the folder exists
        if os.path.exists(race_path):
            # List all subfolders in the racial folder
            subfolders = [f for f in os.listdir(race_path) if os.path.isdir(os.path.join(race_path, f))]

            # Move and rename each subfolder
            for folder in subfolders:
                old_path = os.path.join(race_path, folder)
                new_name = f"{race}_{folder}"
                new_path = os.path.join(base_path, new_name)

                # Move the folder to the new location with the new name
                shutil.move(old_path, new_path)
                print(f"Moved and renamed {old_path} to {new_path}")
        else:
            print(f"No folder found for {race}")

# Example usage
base_directory = '/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/data'
move_and_rename_data_folders(base_directory)


Moved and renamed /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/data/African/m.010lz5 to /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/data/African_m.010lz5
Moved and renamed /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/data/African/m.011y5k to /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/data/African_m.011y5k
Moved and renamed /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/data/African/m.01257s to /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/data/African_m.01257s
Moved and renamed /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/data/African/m.0126y2 to /mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/data/African_m.0126y2
Moved and renamed /mnt/c/Dokumenter/

In [23]:
def extract_unique_identifiers(file_list):
    """
    Extracts a unique identifier from a list of file paths.

    Parameters:
        file_list (list): A list of strings containing file paths.

    Returns:
        list: A list of unique identifiers extracted from the file paths.
    """
    identifiers = []
    for file_path in file_list:
        # Extract the base filename without extension and path
        base_name = os.path.splitext(os.path.basename(file_path))[0]
        identifiers.append(base_name)

    return identifiers


def drop_after_zeros(s):
    """
    Drops everything from the input string after the first occurrence of three consecutive zeros
    and also drops the last underscore before the zeros.

    Parameters:
        s (str): The input string.

    Returns:
        str: The string up to, but not including, the last underscore before the three consecutive zeros.
    """
    # Find the index of the first occurrence of three consecutive zeros
    zero_index = s.find('000')

    # If three consecutive zeros are found
    if zero_index != -1:
        # Find the last underscore before the '000'
        underscore_index = s.rfind('_', 0, zero_index)
        # If an underscore is found, return the substring up to that point
        if underscore_index != -1:
            return s[:underscore_index]
        else:
            return s[:zero_index]
    else:
        return s  # If no '000' sequence is found, return the original string

import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import os
import json
import numpy as np
def load_data(path):
    f = open(path)
    data = json.load(f)
    df = pd.DataFrame.from_dict(data, orient='index', columns=['Age'])
    f.close()

    # removing nans
    nans = df['Age'].isna().sum()
    df = df.dropna()

    describe = df.describe()
    value_counts = df.value_counts()

    # make new column for the identity based on the index
    df['Identity'] = df.index
    df['Identity'] = df.index.str.rsplit('_', 1).str[0]

    df = df.sort_values(by='Age')

    return df, nans, describe, value_counts

import pandas as pd

def drop_number_after_last_underscore(df, column_name):
    """
    Drops the number after the last underscore from the string in the specified column of a DataFrame.

    Parameters:
        df (pd.DataFrame): The DataFrame containing the column to process.
        column_name (str): The name of the column to process.

    Returns:
        pd.DataFrame: The DataFrame with the modified column.
    """
    # Check if the column exists in the DataFrame
    if column_name in df.columns:
        # Split the string by underscore and drop the last part
        df[column_name] = df[column_name].apply(lambda x: '_'.join(x.split('_')[:-1]))
    else:
        raise ValueError(f"The column {column_name} does not exist in the DataFrame.")

    return df


In [24]:
df_YLFW = load_data('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/age_estimations/YLFW_output_data_precroped_all_age.json')[0]
df_RFW_african = load_data('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/age_estimations/RFW_age_estimations/output_african.json')[0]
df_RFW_asian = load_data('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/age_estimations/RFW_age_estimations/output_asian.json')[0]
df_RFW_caucasian = load_data('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/age_estimations/RFW_age_estimations/output_caucasian.json')[0]
df_RFW_indian = load_data('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/age_estimations/RFW_age_estimations/output_indian.json')[0]
df_RFW = pd.concat([df_RFW_african, df_RFW_asian, df_RFW_caucasian, df_RFW_indian])
df_RFW.Age = df_RFW.Age.astype(int)
df_RFW = df_RFW.sort_values(by='Age', ascending=True)

age_df = pd.concat([df_YLFW,df_RFW])
age_df['image_name'] = age_df.index

# Mated adults

In [26]:

adults_dir = '/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/'
files_list = list_files_from_subfolders_RFW_full(adults_dir)[0]
image_names = extract_unique_identifiers(files_list)


In [21]:
files_list

['African_m.010lz5/m.010lz5_0001.jpg',
 'African_m.010lz5/m.010lz5_0002.jpg',
 'African_m.010lz5/m.010lz5_0003.jpg',
 'African_m.011y5k/m.011y5k_0001.jpg',
 'African_m.011y5k/m.011y5k_0002.jpg',
 'African_m.011y5k/m.011y5k_0003.jpg',
 'African_m.011y5k/m.011y5k_0004.jpg',
 'African_m.01257s/m.01257s_0001.jpg',
 'African_m.01257s/m.01257s_0002.jpg',
 'African_m.01257s/m.01257s_0003.jpg',
 'African_m.0126y2/m.0126y2_0001.jpg',
 'African_m.0126y2/m.0126y2_0002.jpg',
 'African_m.0126y2/m.0126y2_0003.jpg',
 'African_m.0126y2/m.0126y2_0004.jpg',
 'African_m.012f1d/m.012f1d_0001.jpg',
 'African_m.012f1d/m.012f1d_0002.jpg',
 'African_m.012f1d/m.012f1d_0003.jpg',
 'African_m.012gl8/m.012gl8_0001.jpg',
 'African_m.012gl8/m.012gl8_0002.jpg',
 'African_m.012gl8/m.012gl8_0003.jpg',
 'African_m.012gr9/m.012gr9_0001.jpg',
 'African_m.012gr9/m.012gr9_0002.jpg',
 'African_m.012mh_/m.012mh__0002.jpg',
 'African_m.012mh_/m.012mh__0003.jpg',
 'African_m.012mh_/m.012mh__0004.jpg',
 'African_m.012mh_/m.012m

In [27]:
len(files_list)

37015

In [28]:

identity_names = []
for i in image_names:
    identity_name = drop_after_zeros(i)

    identity_names.append(identity_name)

DF = pd.DataFrame(
    {'files_list': files_list,
     'image_name': image_names,
     'identity_name': identity_names,
     'enrolled' : 'enrolled'})
DF['ethnicity'] = DF['files_list'].apply(lambda x: x.split('_')[0] if isinstance(x, str) and x.split() else None)
OFIQ_a = pd.read_csv('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/OFIQ_results/table_adults_all_in_one.csv', sep=';')
OFIQ_c = pd.read_csv('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/OFIQ_results/OFIQ_children_all.csv', sep=';')
OFIQ_c['image_name']= OFIQ_c['Filename'].apply(lambda x: x.split('/')[-1])
OFIQ_c['image_name']= OFIQ_c['image_name'].apply(lambda x: x.split('.')[0])
OFIQ_a['image_name']=OFIQ_a['Filename'].apply(lambda x: x.split('/')[-1])
OFIQ_a['image_name']=OFIQ_a['image_name'].apply(lambda x: x.split('.jpg')[0])
OFIQ = pd.concat([OFIQ_a,OFIQ_c])

final_adult = pd.merge(DF, age_df, on='image_name', how='left')
final_adult = final_adult.merge(OFIQ[['image_name', 'UnifiedQualityScore.scalar', 'HeadPoseYaw.scalar','HeadPosePitch.scalar','HeadPoseRoll.scalar']],
                                on='image_name',
                                how='left')


In [29]:
final_adult

Unnamed: 0,files_list,image_name,identity_name,enrolled,ethnicity,Age,Identity,UnifiedQualityScore.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar
0,African_m.010lz5/m.010lz5_0001.jpg,m.010lz5_0001,m.010lz5,enrolled,African,38,m.010lz5,35.0,100.0,100.0,100.0
1,African_m.010lz5/m.010lz5_0002.jpg,m.010lz5_0002,m.010lz5,enrolled,African,29,m.010lz5,40.0,76.0,81.0,97.0
2,African_m.010lz5/m.010lz5_0003.jpg,m.010lz5_0003,m.010lz5,enrolled,African,37,m.010lz5,34.0,99.0,76.0,100.0
3,African_m.011y5k/m.011y5k_0001.jpg,m.011y5k_0001,m.011y5k,enrolled,African,40,m.011y5k,5.0,90.0,100.0,100.0
4,African_m.011y5k/m.011y5k_0002.jpg,m.011y5k_0002,m.011y5k,enrolled,African,43,m.011y5k,51.0,100.0,100.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...
37079,Indian_m.0vshsz9/m.0vshsz9_0002.jpg,m.0vshsz9_0002,m.0vshsz9,enrolled,Indian,36,m.0vshsz9,81.0,96.0,100.0,100.0
37080,Indian_m.0vshsz9/m.0vshsz9_0003.jpg,m.0vshsz9_0003,m.0vshsz9,enrolled,Indian,25,m.0vshsz9,76.0,99.0,96.0,100.0
37081,Indian_m.0_4pw/m.0_4pw_0002.jpg,m.0_4pw_0002,m.0_4pw,enrolled,Indian,64,m.0_4pw,16.0,90.0,100.0,100.0
37082,Indian_m.0_4pw/m.0_4pw_0003.jpg,m.0_4pw_0003,m.0_4pw,enrolled,Indian,37,m.0_4pw,24.0,89.0,99.0,100.0


In [30]:
final_adult.to_csv('mated_adults_image_info.csv', index=False)

In [11]:
# sammenlign med RFW_full_info_excluding_children.csv , only keep what matched

# Nonmated adults


In [31]:
adults_dir = '/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/'
files_list = list_files_from_subfolders_RFW_full(adults_dir)[1]
image_names = extract_unique_identifiers(files_list)


In [32]:
len(files_list)

284

In [35]:

identity_names = []
for i in image_names:
    identity_name = drop_after_zeros(i)

    identity_names.append(identity_name)

DF = pd.DataFrame(
    {'files_list': files_list,
     'image_name': image_names,
     'identity_name': identity_names,
     'enrolled' : 'non_enrolled'})
DF['ethnicity'] = DF['files_list'].apply(lambda x: x.split('_')[0] if isinstance(x, str) and x.split() else None)
OFIQ_a = pd.read_csv('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/OFIQ_results/table_adults_all_in_one.csv', sep=';')
OFIQ_c = pd.read_csv('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/OFIQ_results/OFIQ_children_all.csv', sep=';')
OFIQ_c['image_name']= OFIQ_c['Filename'].apply(lambda x: x.split('/')[-1])
OFIQ_c['image_name']= OFIQ_c['image_name'].apply(lambda x: x.split('.')[0])
OFIQ_a['image_name']=OFIQ_a['Filename'].apply(lambda x: x.split('/')[-1])
OFIQ_a['image_name']=OFIQ_a['image_name'].apply(lambda x: x.split('.jpg')[0])
OFIQ = pd.concat([OFIQ_a,OFIQ_c])



final_adult = pd.merge(DF, age_df, on='image_name', how='left')
final_adult_non = final_adult.merge(OFIQ[['image_name', 'UnifiedQualityScore.scalar', 'HeadPoseYaw.scalar','HeadPosePitch.scalar','HeadPoseRoll.scalar']],
                                on='image_name',
                                how='left')
final_adult_non.to_csv('nonmated_adults_image_info.csv', index=False)


In [36]:
final_adult_non

Unnamed: 0,files_list,image_name,identity_name,enrolled,ethnicity,Age,Identity,UnifiedQualityScore.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar
0,African_m.01jq08_/m.01jq08__0002.jpg,m.01jq08__0002,m.01jq08_,non_enrolled,African,38,m.01jq08_,29.0,95.0,89.0,100.0
1,African_m.01jr5w/m.01jr5w_0003.jpg,m.01jr5w_0003,m.01jr5w,non_enrolled,African,25,m.01jr5w,16.0,91.0,96.0,100.0
2,African_m.01l_04j/m.01l_04j_0001.jpg,m.01l_04j_0001,m.01l_04j,non_enrolled,African,42,m.01l_04j,6.0,100.0,73.0,100.0
3,African_m.01mqdhm/m.01mqdhm_0001.jpg,m.01mqdhm_0001,m.01mqdhm,non_enrolled,African,17,m.01mqdhm,,,,
4,African_m.01ng51t/m.01ng51t_0001.jpg,m.01ng51t_0001,m.01ng51t,non_enrolled,African,47,m.01ng51t,23.0,63.0,96.0,98.0
...,...,...,...,...,...,...,...,...,...,...,...
280,Indian_m.0j49qzy/m.0j49qzy_0001.jpg,m.0j49qzy_0001,m.0j49qzy,non_enrolled,Indian,36,m.0j49qzy,3.0,48.0,81.0,92.0
281,Indian_m.0jbc3/m.0jbc3_0002.jpg,m.0jbc3_0002,m.0jbc3,non_enrolled,Indian,43,m.0jbc3,58.0,99.0,95.0,100.0
282,Indian_m.0jkv_y7/m.0jkv_y7_0002.jpg,m.0jkv_y7_0002,m.0jkv_y7,non_enrolled,Indian,25,m.0jkv_y7,16.0,27.0,87.0,93.0
283,Indian_m.0j_9xt/m.0j_9xt_0001.jpg,m.0j_9xt_0001,m.0j_9xt,non_enrolled,Indian,49,m.0j_9xt,22.0,100.0,100.0,100.0


# Mated children

In [38]:
child_dir = '/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/children_full'
# files_list = list_files_from_subfolders(child_dir)
files_list_child = list_files_from_folders_with_multiple_files_child(child_dir)[0]

image_names = extract_unique_identifiers(files_list_child)

identity_names = []
for i in image_names:
    identity_names.append(drop_after_zeros(i))
DF_child = pd.DataFrame(
    {'files_list': files_list_child,
     'image_name': image_names,
     'enrolled' : 'enrolled'})

DF_child['identity_name'] = DF_child['image_name'].apply(lambda x: '_'.join(x.split('_')[:-1]) if isinstance(x, str) and x.split() else None)
DF_child['ethnicity'] = DF_child['files_list'].apply(lambda x: x.split('_')[0] if isinstance(x, str) and x.split() else None)
final_child = pd.merge(DF_child, age_df, on='image_name', how='left')
final_child_mated = final_child.merge(OFIQ[['image_name', 'UnifiedQualityScore.scalar', 'HeadPoseYaw.scalar','HeadPosePitch.scalar','HeadPoseRoll.scalar']],
                                on='image_name',
                                how='left')


In [39]:
final_child_mated

Unnamed: 0,files_list,image_name,enrolled,identity_name,ethnicity,Age,Identity,UnifiedQualityScore.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar
0,African_0/African_0_0.png,African_0_0,enrolled,African_0,African,7,African_0,34.0,100.0,93.0,100.0
1,African_0/African_0_1.png,African_0_1,enrolled,African_0,African,18,African_0,34.0,93.0,100.0,100.0
2,African_1/African_1_11.png,African_1_11,enrolled,African_1,African,5,African_1,65.0,100.0,99.0,100.0
3,African_1/African_1_4.png,African_1_4,enrolled,African_1,African,2,African_1,23.0,95.0,99.0,100.0
4,African_10/African_10_1.png,African_10_1,enrolled,African_10,African,5,African_10,50.0,100.0,97.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...
9099,Indian_m.0j63px2/m.0j63px2_0004.jpg,m.0j63px2_0004,enrolled,m.0j63px2,Indian,15,m.0j63px2,,,,
9100,Indian_m.0jfgrjr/m.0jfgrjr_0001.jpg,m.0jfgrjr_0001,enrolled,m.0jfgrjr,Indian,16,m.0jfgrjr,,,,
9101,Indian_m.0jfgrjr/m.0jfgrjr_0004.jpg,m.0jfgrjr_0004,enrolled,m.0jfgrjr,Indian,16,m.0jfgrjr,,,,
9102,Indian_m.0jwwdq1/m.0jwwdq1_0001.jpg,m.0jwwdq1_0001,enrolled,m.0jwwdq1,Indian,16,m.0jwwdq1,,,,


In [40]:
# count NaN in final_child_mated['UnifiedQualityScore.scalar']
final_child_mated['UnifiedQualityScore.scalar'].isna().sum()

468

In [41]:
final_child_mated.to_csv('mated_children_image_info.csv', index=False)

# Non mated children

In [42]:
child_dir = '/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/data_full/children_full'
files_list_child = list_files_from_folders_with_multiple_files_child(child_dir)[1]

image_names = extract_unique_identifiers(files_list_child)

identity_names = []
for i in image_names:
    identity_names.append(drop_after_zeros(i))
DF_child = pd.DataFrame(
    {'files_list': files_list_child,
     'image_name': image_names,
     'enrolled' : 'non_enrolled'})

DF_child['identity_name'] = DF_child['image_name'].apply(lambda x: '_'.join(x.split('_')[:-1]) if isinstance(x, str) and x.split() else None)
DF_child['ethnicity'] = DF_child['files_list'].apply(lambda x: x.split('_')[0] if isinstance(x, str) and x.split() else None)
final_child = pd.merge(DF_child, age_df, on='image_name', how='left')
final_child_non = final_child.merge(OFIQ[['image_name', 'UnifiedQualityScore.scalar', 'HeadPoseYaw.scalar','HeadPosePitch.scalar','HeadPoseRoll.scalar']],
                                on='image_name',
                                how='left')
final_child_non.to_csv('nonmated_children_image_info.csv', index=False)

In [43]:
final_child_non

Unnamed: 0,files_list,image_name,enrolled,identity_name,ethnicity,Age,Identity,UnifiedQualityScore.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar
0,African_110/African_110_0.png,African_110_0,non_enrolled,African_110,African,14,African_110,29.0,100.0,89.0,100.0
1,African_111/African_111_0.png,African_111_0,non_enrolled,African_111,African,21,African_111,30.0,100.0,100.0,100.0
2,African_122/African_122_0.png,African_122_0,non_enrolled,African_122,African,7,African_122,44.0,100.0,99.0,100.0
3,African_123/African_123_0.png,African_123_0,non_enrolled,African_123,African,4,African_123,23.0,87.0,94.0,99.0
4,African_124/African_124_0.png,African_124_0,non_enrolled,African_124,African,7,African_124,8.0,99.0,80.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...
818,Indian_m.0hht8wn/m.0hht8wn_0001.jpg,m.0hht8wn_0001,non_enrolled,m.0hht8wn,Indian,17,m.0hht8wn,,,,
819,Indian_m.0hnbj9t/m.0hnbj9t_0001.jpg,m.0hnbj9t_0001,non_enrolled,m.0hnbj9t,Indian,18,m.0hnbj9t,,,,
820,Indian_m.0j4c85h/m.0j4c85h_0006.jpg,m.0j4c85h_0006,non_enrolled,m.0j4c85h,Indian,17,m.0j4c85h,,,,
821,Indian_m.0k3208/m.0k3208_0001.jpg,m.0k3208_0001,non_enrolled,m.0k3208,Indian,18,m.0k3208,,,,


# Add image_name and identity_name to OFIQ

In [44]:
OFIQ_a = pd.read_csv('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/OFIQ_results/table_adults_all_in_one.csv', sep=';')
OFIQ_c = pd.read_csv('/mnt/c/Dokumenter/Dokumenter/UNI/Master/Thesis/GitHub_Repo/Master_Thesis/data/OFIQ_results/OFIQ_children_all.csv', sep=';')


In [45]:


OFIQ_c['image_name']= OFIQ_c['Filename'].apply(lambda x: x.split('/')[-1])
OFIQ_c['image_name']= OFIQ_c['image_name'].apply(lambda x: x.split('.')[0])
OFIQ_a['image_name']=OFIQ_a['Filename'].apply(lambda x: x.split('/')[-1])
OFIQ_a['image_name']=OFIQ_a['image_name'].apply(lambda x: x.split('.jpg')[0])


In [46]:
OFIQ_c.head()

Unnamed: 0,Filename,UnifiedQualityScore,BackgroundUniformity,IlluminationUniformity,LuminanceMean,LuminanceVariance,UnderExposurePrevention,OverExposurePrevention,DynamicRange,Sharpness,...,RightwardCropOfTheFaceImage.scalar,DownwardCropOfTheFaceImage.scalar,UpwardCropOfTheFaceImage.scalar,HeadPoseYaw.scalar,HeadPosePitch.scalar,HeadPoseRoll.scalar,ExpressionNeutrality.scalar,NoHeadCoverings.scalar,Unnamed: 57,image_name
0,../../../data/tests/images/children_full_moved...,21.233467,162.705495,0.32964,0.084315,0.006262,0.716862,0.0,5.723622,47,...,100,5,0,100,93,100,14,100,,African_0_0
1,../../../data/tests/images/children_full_moved...,21.258804,90.363855,0.609568,0.108497,0.010054,0.592055,0.0,6.148537,47,...,100,3,0,93,100,100,9,100,,African_0_1
2,../../../data/tests/images/children_full_moved...,19.901749,94.363332,0.24,0.135628,0.018415,0.502809,0.0,6.472321,40,...,100,1,0,97,90,99,18,100,,African_100_1
3,../../../data/tests/images/children_full_moved...,20.915359,139.023632,0.291454,0.152952,0.020221,0.469387,0.0,6.621154,35,...,100,1,0,100,98,100,74,100,,African_100_3
4,../../../data/tests/images/children_full_moved...,24.882755,117.132812,0.018519,0.193522,0.026933,0.412887,0.0,6.908666,42,...,100,1,0,100,96,100,96,100,,African_101_0


In [None]:

OFIQ = pd.concat([OFIQ_a,OFIQ_c])

In [None]:
OFIQ_a.to_csv('table_adults_all_in_one.csv', index=False)