### Import Library

In [1]:
import pandas as pd
import os

### Rename the Dataset Images

In [2]:
# Get the root directory path
root_path = os.getcwd()

# Get the absolute adience directory path
adience_path = os.path.join(root_path, "adience")

# Loop through all the image directories in adience dataset
for image_dir in os.listdir(adience_path):

    image_dir = os.path.join(adience_path, image_dir)

    if os.path.isdir(image_dir):
        filenames = os.listdir(image_dir)

        # Rename each images in the dataset
        for image_filename in filenames:
            target_filename = image_filename.split(sep='.', maxsplit=2)[-1]

            if target_filename == "jpg": continue

            old_name = os.path.join(image_dir, image_filename)
            new_name = os.path.join(image_dir, target_filename)

            try:
                os.rename(old_name, new_name)
            except:
                os.remove(new_name)
                os.rename(old_name, new_name)
                print("removed duplicate")

### Import Dataset Metadata

In [3]:
# Specify the age group map
age_group_map = {
    '(0, 2)': 0,
    '(4, 6)': 1,
    '(8, 12)': 2,
    '(15, 20)': 3,
    '(25, 32)': 4,
    '(38, 43)': 5,
    '(48, 53)': 6,
    '(60, 100)': 7,
}

# Specify the gender map
gender_map = {
    'f': 0,
    'm': 1,
}

In [4]:
path = []
age = []
gender = []

# Read the 5-fold-cross-validation metadata txt file and merge together
for i in range(1):
    with open(f"./adience/fold_{i}_data.txt") as file:
        for line in file.readlines():
            line_data = line.split()
            
            # Skip header line
            if line_data[0] == 'user_id': continue

            p = "/".join(line_data[:2])
            a = " ".join(line_data[3:5])
            g = line_data[5]

            if not os.path.isfile(os.path.join(adience_path, p)) or g not in list(gender_map.keys()) or a not in list(age_group_map.keys()): continue

            path.append(p)

            age.append(a)

            gender.append(g)


In [5]:
# Create a pandas dataframe
df = pd.DataFrame({
    "image_path": path,
    "gender": gender,
    "age": age,
})

In [6]:
# Preprocessing the df data
df['gender'] = df['gender'].apply(lambda g: gender_map[g])
df['age'] = df['age'].apply(lambda a: age_group_map[a])

In [9]:
# Export preprocessed dataframe
df.to_csv("./adience/preprocessed_data.csv", index=False)