## Evaluating GPT Models

In [None]:
!pip install openai

In [4]:
import base64
import sys
import os
import csv
from openai import OpenAI
import matplotlib.pyplot as plt

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    from google.colab import userdata
    OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
else:
    import os
    from dotenv import load_dotenv

    load_dotenv()
    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

client = OpenAI(api_key=OPENAI_API_KEY)

In [5]:
# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

## Individual Evaluation

In [None]:
# Path to your image
image_path = "images/synthesized_images/Azerbaijan/Black/Azerbaijan_clothes_black_0.png"

# Getting the base64 string
base64_image = encode_image(image_path)

response = client.chat.completions.create(
  model="gpt-4o",
  messages=[
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "Which country is the clothing in the photo mostly associated with? Which visual cues helped you make this determination?",
        },
        {
          "type": "image_url",
          "image_url": {
            "url":  f"data:image/jpeg;base64,{base64_image}"
          },
        },
      ],
    }
  ],
)


image = plt.imread(image_path)
plt.imshow(image)
plt.axis('off') 
plt.show()

print(f"Filename: {os.path.basename(image_path)}")
print(response.choices[0].message.content)
print("=" * 50)

## Batch Evaluation

In [4]:
# Function to create a request for each image
def process_images_in_batch(image_folder, client, max_images=20):
    # List all image files in the directory
    image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
    num_images_processed = 0
    responses = []
    
    for image_file in image_files:
        if num_images_processed >= max_images:
            break
        num_images_processed += 1
        
        image_path = os.path.join(image_folder, image_file)
        base64_image = encode_image(image_path)
        
        try:
            response = client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": "Which country is the clothing in the photo mostly associated with? Which visual cues helped you make this determination?",                                
                            },
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{base64_image}"
                                },
                            },
                        ],
                    }
                ],
            )
        except Exception as e:
            print(f"Error processing image {image_file}: {e}")
            continue
        
        responses.append({
            "image_file": image_file,
            "response": response.choices[0]
        })
    
    return responses

csv_file_path = "responses_original.csv"

def print_save_responses(responses, image_folder, save=True):         
    # Output the results
    data_to_save = []
    
    for response in responses:
        image_path = os.path.join(image_folder, response['image_file'])
        file_name = os.path.basename(image_path)
        message_content = response['response'].message.content

        # Append data for each response
        if save: 
            data_to_save.append({
                "image_file": file_name,
                "response": message_content
            })
        
        # Display the image
        # image = plt.imread(image_path)
        # plt.imshow(image)
        # plt.axis('off') 
        # plt.show()

        # Print the formatted message and file name
        print(f"Filename: {file_name}")
        print(message_content)
        print("=" * 50)
    
    if save:
        with open(csv_file_path, mode='a', newline='', encoding='utf-8') as csvfile:
            csv_writer = csv.writer(csvfile)
            # Write header
            # csv_writer.writerow(['Original Country', 'Synthesized Race', 'Image File Name', 'Content'])

            # Write each response
            for response in data_to_save:
                csv_writer.writerow(['Myanmar', 'White', response['image_file'], response['response']])

            print(f"Data saved to: {csv_file_path}")


image_folder = "images/synthesized_images/Myanmar/clothes/White"
responses = process_images_in_batch(image_folder, client)
print_save_responses(responses, image_folder, save=True)

Filename: edited_myanmar_clothes_8_100.png
The clothing in the photo is mostly associated with Myanmar. The visual cues include:

1. The longyi, which is the traditional skirt-like garment worn by both men and women in Myanmar.
2. The patterned design, which is common in Burmese textiles.
3. The traditional style of hair and accessories, which aligns with Burmese cultural attire.

These elements help identify the clothing as being from Myanmar.
Filename: edited_myanmar_clothes_2_1439884212.png
The clothing in the photo is associated with Russia, specifically traditional costumes that are representative of various ethnic groups within the country. The visual cues include the distinctive patterns, colors, and styles typical of Russian folk attire, such as layered clothing, decorative embroidery, and unique hats or headpieces. Each outfit captures the cultural diversity found in different regions of Russia.
Filename: edited_myanmar_clothes_3_1439884212.png
The clothing in the photo is mos

### Renaming Files (If needed)

In [None]:
import pandas as pd
import re

In [None]:
# rename all the files in the folder
image_folder = 'images/synthesized_images/Azerbaijan/Indian'

for i, filename in enumerate(os.listdir(image_folder)):
    # check if the file name ends with .png
    if filename.endswith('.png'):
        os.rename(os.path.join(image_folder, filename), os.path.join(image_folder, f"Azerbaijan_clothes_indian_{i}.png"))

In [None]:
def extract_seed_number(filename):
    """Extract the seed number from the filename."""
    match = re.search(r'_(\d+)\.png$', filename)
    return match.group(1) if match else None

def extract_base_identifier(filename):
    """Extract the base identifier from the filename (everything before the seed number)."""
    return re.match(r'(.*?)_\d+\.png$', filename).group(1)

def create_standardized_name(row, index_map):
    """Create a standardized name based on the specified format."""
    # Get the unique index for this base identifier within its country
    base_id = extract_base_identifier(row['old_file_name'])
    country_base_map = index_map[row['original_country']]
    index = country_base_map[base_id]
    
    return f"{row['original_country']}_{row['synthesized_race']}_{index}_{extract_seed_number(row['old_file_name'])}"

def standardize_filenames(data):
    """Process the data and add standardized filenames."""
    # Create a nested mapping of country -> base identifiers -> indices
    index_map = {}
    for country in data['original_country'].unique():
        # Filter data for this country
        country_data = data[data['original_country'] == country]
        # Get unique base identifiers for this country
        base_identifiers = [extract_base_identifier(fname) 
                          for fname in country_data['old_file_name'].unique()]
        # Create index mapping starting from 0 for each country
        index_map[country] = {identifier: i 
                            for i, identifier in enumerate(sorted(set(base_identifiers)))}
    
    # Add new column with standardized names
    data.insert(
        data.columns.get_loc('old_file_name') + 1,  # Insert after old_file_name
        'standardized_name',
        data.apply(lambda row: create_standardized_name(row, index_map) + '.png', axis=1)
    )
    
    return data

# Read the data
data = pd.read_csv('responses/responses_synthesized.csv')

# Process the data
processed_data = standardize_filenames(data)

# Display examples grouped by country to show index reset
print("\nExample of standardization:")
for country in processed_data['original_country'].unique():
    print(f"\n{country} examples:")
    print(processed_data[processed_data['original_country'] == country]
          [['original_country', 'old_file_name', 'standardized_name']].head(3))

# Save the processed data
processed_data.to_csv('processed_data_new.csv', index=False)
print("\nProcessed data saved to 'processed_data.csv'")


Example of standardization:

Korean examples:
  original_country                                      old_file_name  \
0           Korean  edited_1ae851cc084484321f3d5409093711f2_205568...   
1           Korean  edited_4-colors-limited-offer-woman-elegant-1_...   
2           Korean  edited_6f0d75dc7aa816dde4e847b29e197d15_205568...   

               standardized_name  
0  Korean_Black_0_2055686324.png  
1  Korean_Black_2_2055686324.png  
2  Korean_Black_7_2055686324.png  

Myanmar examples:
   original_country                             old_file_name  \
40          Myanmar   edited_myanmar_clothes_7_2766869188.png   
41          Myanmar   edited_myanmar_clothes_9_2766869188.png   
42          Myanmar  edited_myanmar_clothes_10_2766869188.png   

                 standardized_name  
40  Myanmar_Asian_7_2766869188.png  
41  Myanmar_Asian_9_2766869188.png  
42  Myanmar_Asian_1_2766869188.png  

UK examples:
   original_country                                      old_file_name  \
78  