# Utility Functions (for organizing data)

### Add acceptable names for UK and US

In [9]:
import pandas as pd

# Load the CSV file into a DataFrame
def add_labels(csv_file_path, acceptable_names):
    df = pd.read_csv(csv_file_path)
    df['original_country'] = acceptable_names
    df.to_csv(csv_file_path, index=False)
    print(f"The 'original_country' column has been updated in {csv_file_path}.")


acceptable_names = "UK,United Kingdom,Scotland,Britain,British,Irish,Wales,England,English"
# acceptable_names = "USA,US,the United States of America,the United States,Hawaii,American"
for model in ['gpt4o', 'glm4v', 'internVL']: 
    add_labels(f"responses/{model}/US_Food_Results.csv", acceptable_names)
# add_labels(f"responses/glm4v/UK_Festival_Results_2.csv", acceptable_names)


The 'original_country' column has been updated in responses/glm4v/UK_Festival_Results_2.csv.



### Add Food Label Column

Referring to the country name and image index from the **csv file of food labels**, it will assign a matching label to each image file in a given **csv file of responses**. Therefore, it's important for an original image and their corresponding synthesized images to have the same index.

- For eg. Myanmar,Asian,Myanmar_Asian_food_1.png,"The food in the photo is...` will refer to `Myanmar_food_1.png,"Burmese Curry"` from `food_label.csv`.

In [6]:
import pandas as pd

def match_food_label(food_label_csv, responses_csv):
    labels_df = pd.read_csv(food_label_csv)
    details_df = pd.read_csv(responses_csv)

    # Extract the country and image number from the image_file in labels_df
    labels_df["country"] = labels_df["image_file"].str.extract(r"^([A-Za-z]+)_")
    labels_df["image_number"] = labels_df["image_file"].str.extract(r"(\d+)")

    # Extract the country and image number from the image_file_name in details_df
    details_df["country"] = details_df["image_file_name"].str.extract(r"^([A-Za-z]+)_")
    details_df["image_number"] = details_df["image_file_name"].str.extract(r"(\d+)")

    # Drop the 'label' column in details_df to avoid conflicts during merging
    if "label" in details_df.columns:
        details_df.drop(columns=["label"], inplace=True)

    # Merge on both 'country' and 'image_number'
    merged_df = pd.merge(details_df, labels_df, on=["country", "image_number"], how="left")

    # Drop auxiliary columns
    merged_df.drop(columns=["country", "image_number"], inplace=True)

    # Reorder columns to place 'label' right after 'image_file_name'
    columns_order = ["original_country", "synthesized_race", "image_file_name", "label", "response"]
    merged_df = merged_df[columns_order]

    # Save to a new CSV file
    merged_df.to_csv(responses_csv, index=False)

food_label_csv = "images/food_label.csv"

for model in ['gpt4o', 'glm4v', 'internVL']: 
    responses_csv = f"responses/{model}/US_Food_Results.csv"
    match_food_label(food_label_csv, responses_csv)

### Converting race names

In [4]:
import os
import pandas as pd

race_map = {
    'Asian': 'EastAsian',
    'Black': 'African',
    'Indian': 'SouthAsian',
    'White': 'Caucasian',
}

def convert_race_names(parent_dir):
    for model in ['internVL', 'gpt4o', 'glm4v']:
        for filename in os.listdir(os.path.join(parent_dir, model)):
            if filename.endswith('.csv'):
                csv_file_path = os.path.join(parent_dir, model, filename)
                df = pd.read_csv(csv_file_path)
                
                # Replace race values in 'synthesized_race' column
                df['synthesized_race'] = df['synthesized_race'].replace(race_map)

                # Replace race values in 'image_file_name' column
                for old_race, new_race in race_map.items():
                    df['image_file_name'] = df['image_file_name'].str.replace(f"_{old_race}_", f"_{new_race}_")     

                df.to_csv(csv_file_path, index=False)

convert_race_names('responses')

In [3]:
import os

# Define the race mapping
race_map = {
    'Asian': 'EastAsian',
    'Black': 'African',
    'Indian': 'SouthAsian',
    'White': 'Caucasian',
}

# Base directory containing the image folders
base_dir = "images"

# Rename directories and files
for root, dirs, files in os.walk(base_dir, topdown=False):
    # Rename directories
    for dir_name in dirs:
        for old_race, new_race in race_map.items():
            if dir_name == old_race:
                old_dir_path = os.path.join(root, dir_name)
                new_dir_path = os.path.join(root, new_race)
                os.rename(old_dir_path, new_dir_path)
                print(f"Renamed directory: {old_dir_path} -> {new_dir_path}")

    # Rename files
    for file_name in files:
        old_file_path = os.path.join(root, file_name)
        new_file_name = file_name
        for old_race, new_race in race_map.items():
            if f"_{old_race}_" in file_name:
                new_file_name = file_name.replace(f"_{old_race}_", f"_{new_race}_")
                break
        new_file_path = os.path.join(root, new_file_name)
        if old_file_path != new_file_path:
            os.rename(old_file_path, new_file_path)
            # print(f"Renamed file: {old_file_path} -> {new_file_path}")


Renamed directory: images/Azerbaijani_Festival/synthesized_images/Black -> images/Azerbaijani_Festival/synthesized_images/African
Renamed directory: images/Azerbaijani_Festival/synthesized_images/Indian -> images/Azerbaijani_Festival/synthesized_images/SouthAsian
Renamed directory: images/Azerbaijani_Festival/synthesized_images/Asian -> images/Azerbaijani_Festival/synthesized_images/EastAsian
Renamed directory: images/US_Festival/synthesized_images/Black -> images/US_Festival/synthesized_images/African
Renamed directory: images/US_Festival/synthesized_images/Indian -> images/US_Festival/synthesized_images/SouthAsian
Renamed directory: images/US_Festival/synthesized_images/Asian -> images/US_Festival/synthesized_images/EastAsian
Renamed directory: images/US_Food/synthesized_images/Black -> images/US_Food/synthesized_images/African
Renamed directory: images/US_Food/synthesized_images/Indian -> images/US_Food/synthesized_images/SouthAsian
Renamed directory: images/US_Food/synthesized_imag

### Renaming files in a random index order

In [5]:
import os

def rename_files_to_azerbaijani_festival(directory):
    temp_suffix = "_temp"
    temp_files = []

    # Pass 1: Rename to temporary files to avoid overwrites
    for index, filename in enumerate(os.listdir(directory)):
        if filename.endswith(('.png', '.jpg', '.jpeg')):
            new_filename = f"US_Festival_{index}{temp_suffix}.png"
            os.rename(os.path.join(directory, filename), os.path.join(directory, new_filename))
            temp_files.append(new_filename)

    # Pass 2: Rename from temporary to final names
    for temp_filename in temp_files:
        final_filename = temp_filename.replace(temp_suffix, "")
        os.rename(os.path.join(directory, temp_filename), os.path.join(directory, final_filename))

directory = "images/US_Festival/original_images"
rename_files_to_azerbaijani_festival(directory)

### Decrement index of images in csv file

In [None]:
import pandas as pd

def decrement_image_index(csv_file_path):
    df = pd.read_csv(csv_file_path)
    
    # Function to decrement the index in the image file name
    def decrement_index(file_name):
        parts = file_name.split('_')
        index_part = parts[-1].split('.')[0]
        new_index = str(int(index_part) - 1)
        parts[-1] = new_index + '.' + parts[-1].split('.')[1]
        return '_'.join(parts)
    
    # Apply the function to the image file name column
    df['image_file_name'] = df['image_file_name'].apply(decrement_index)
    
    # Save the updated dataframe back to the CSV file
    df.to_csv(csv_file_path, index=False)

csv_file_path = "images/food_label.csv"
decrement_image_index(csv_file_path)

### Change file names in the folder in accordance with index

In [5]:
import os

def decrement_image_index_in_folder(folder_path, category, race):
    temp_suffix = "_temp"
    temp_files = []

    # Pass 1: Rename to temporary files to avoid overwrites
    for filename in os.listdir(folder_path):
        if filename.endswith(('.png', '.jpg', '.jpeg')):
            parts = filename.split('_')
            # index_part = parts[-1].split('.')[0]
            index_part = parts[-2]

            new_index = str(int(index_part))

            new_filename = f'Azerbaijani_{category}_{new_index}_mask.png'
            temp_filename = new_filename + temp_suffix
            os.rename(os.path.join(folder_path, filename), os.path.join(folder_path, temp_filename))
            temp_files.append(temp_filename)


    # Pass 2: Rename from temporary to final names
    for temp_filename in temp_files:
        final_filename = temp_filename.replace(temp_suffix, "")
        os.rename(os.path.join(folder_path, temp_filename), os.path.join(folder_path, final_filename))

races = ['Asian', 'Black', 'Caucasian', 'Indian']
category = 'Food'

folder_path = f"images/Azerbaijani_{category}/masks"
decrement_image_index_in_folder(folder_path, category, 'original')

# for race in races:
#     folder_path = f"images/Azerbaijani_{category}/synthesized_images/{race}"
#     decrement_image_index_in_folder(folder_path, category, race)
