In [2]:
import pandas as pd
import re
import os

In [2]:
def assign_range(age, ranges):
    """
    Assign an age range to an age.

    Parameters
    ----------
    age : str
        Age to assign a range to.
    ranges : list
        List of predefined age ranges.

    Returns
    -------
    int or None
        Index of the range the age belongs to, or None if no match is found.
    """
    # Match tuple format, e.g., "(8, 12)"
    m = re.match(r"\((\d+), *(\d+)\)", str(age))
    if m:
        age = (int(m.group(1)), int(m.group(2)))
    else:
        # Match single integer age, e.g., "35"
        m = re.match(r"(\d+)", str(age))
        if m:
            age = int(m.group(0))
        else:
            return None

    # If age is already a tuple, check if it falls within a single range
    if isinstance(age, tuple):
        age_minimum, age_maximum = age
        for i, (range_minimum, range_maximum) in enumerate(ranges):
            if (age_minimum >= range_minimum) and (age_maximum <= range_maximum):
                return i
        return None

    # If age is an integer, check which range it belongs to
    if isinstance(age, int):
        for i, (range_minimum, range_maximum) in enumerate(ranges):
            if (age >= range_minimum) and (age <= range_maximum):
                return i
        return None

    return None

def clean_fold_data(input_path, output_path, ranges=None):
    """
    Clean the fold data by handling age inconsistencies and save the cleaned file.

    Parameters
    ----------
    input_path : str
        Path to the input txt file.
    output_path : str
        Path to save the cleaned file.
    ranges : list, optional
        List of predefined age ranges, by default
        [(0, 2), (4, 6), (8, 13), (15, 20), (25, 32), (38, 43), (48, 53), (60, 100)].
    """
    if ranges is None:
        ranges = [
            (0, 2),
            (4, 6),
            (8, 13),
            (15, 20),
            (25, 32),
            (38, 43),
            (48, 53),
            (60, 100),
        ]
    
    # Read the txt file
    fold_data = pd.read_csv(input_path, sep="\t")

    # Drop rows with NaN ages
    fold_data = fold_data[fold_data["age"].notna()]

    # Map age values to the predefined ranges
    fold_data["age"] = fold_data["age"].map(lambda age: assign_range(age, ranges))

    # Drop rows with invalid age values
    fold_data = fold_data.dropna(subset=["age"])

    # Convert age column to integer (index of ranges)
    fold_data["age"] = fold_data["age"].astype(int)

    # Save the cleaned file to the output path
    fold_data.to_csv(output_path, sep="\t", index=False)

In [6]:
for i in range(5):
    input_file = f"../datasets/Adience/folds/fold_{i}_data.txt"
    output_file = f"../datasets/Adience/folds/clean_fold_{i}_data.txt"
    clean_fold_data(input_file, output_file)

In [10]:
for i in range(5):
    input_file = f"../datasets/Adience/folds/clean_fold_{i}_data.txt"
    df = pd.read_csv(input_file, sep='\t')
    print(df['age'].value_counts())

age
4    1646
0     960
5     554
1     494
6     219
2     216
3     152
7     139
Name: count, dtype: int64
age
2    763
4    635
3    525
5    485
1    480
7    156
6    146
0     84
Name: count, dtype: int64
age
0    813
4    785
2    476
1    358
5    276
3    270
7    202
6    120
Name: count, dtype: int64
age
4    970
5    523
2    497
3    468
1    238
0    151
7    118
6    104
Name: count, dtype: int64
age
4    1059
1     570
5     502
0     483
2     340
7     257
6     241
3     227
Name: count, dtype: int64


In [13]:
input_file = f"../datasets/Adience/folds/clean_fold_0_data.txt"
df = pd.read_csv(input_file, sep='\t')
print(df)

           user_id                original_image  face_id  age gender     x  \
0     30601258@N03  10399646885_67c7d20df9_o.jpg        1    4      f     0   
1     30601258@N03  10424815813_e94629b1ec_o.jpg        2    4      m   301   
2     30601258@N03  10437979845_5985be4b26_o.jpg        1    4      f  2395   
3     30601258@N03  10437979845_5985be4b26_o.jpg        3    4      m   752   
4     30601258@N03  11816644924_075c3d8d59_o.jpg        2    4      m   175   
...            ...                           ...      ...  ...    ...   ...   
4375  68094148@N04  11373907673_c6a3812b7b_o.jpg      478    4      m   637   
4376  68094148@N04  11373794746_4720ac792a_o.jpg      477    4      f  2919   
4377  68094148@N04  11373794746_4720ac792a_o.jpg      478    4      m   664   
4378  68094148@N04  11355711315_0f5b5da125_o.jpg      477    4      f   915   
4379  10693681@N00   9162730346_b1bf71120a_o.jpg      479    4      m  2145   

         y    dx    dy  tilt_ang  fiducial_yaw_angl

In [3]:
df1 = pd.read_csv("facebase/data/Adience_256x256_resnet50_imagenet_dldl_v2/data_split1.csv", header=None)
df2 = pd.read_csv("facebase/data/Adience_256x256_resnet50_imagenet_noisy_dldl_v2/data_split1.csv", header=None)

In [8]:
df1

Unnamed: 0,0,1,2,3
0,0,facebase/data/Adience_256x256_resnet50_imagene...,0,4
1,1,facebase/data/Adience_256x256_resnet50_imagene...,0,4
2,2,facebase/data/Adience_256x256_resnet50_imagene...,0,3
3,3,facebase/data/Adience_256x256_resnet50_imagene...,0,6
4,4,facebase/data/Adience_256x256_resnet50_imagene...,0,4
...,...,...,...,...
17697,17697,facebase/data/Adience_256x256_resnet50_imagene...,0,4
17698,17698,facebase/data/Adience_256x256_resnet50_imagene...,0,7
17699,17699,facebase/data/Adience_256x256_resnet50_imagene...,0,4
17700,17700,facebase/data/Adience_256x256_resnet50_imagene...,0,4


In [4]:
df1 = df1.iloc[:, :-2]
df2 = df2.iloc[:, :-2]


In [9]:
df1.equals(df2)

False

In [14]:
path = "facebase/data/Adience_256x256_resnet50_imagenet_dldl_v2/data_split4.csv"
df = pd.read_csv(path, header=None)
df[4] = df[3]
df[5] = 0.75

df.to_csv(path, index=False, header=False)

### Create 6-Class Version

In [2]:
import json
import argparse

def filter_json_by_age(input_path, output_path):
    # Load JSON data from file
    with open(input_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # Filter elements with age 0 to 5
    filtered_data = [item for item in data if 0 <= item.get("age", 6) <= 5]
    
    # Save filtered data to output file
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(filtered_data, f, indent=4)
    
    print(f"Filtered JSON saved to {output_path}")

filter_json_by_age("../facebase/benchmarks/databases/Adience.json",
                   "../facebase/benchmarks/databases/Adience_6c.json")

Filtered JSON saved to ../facebase/benchmarks/databases/Adience_6c.json
