In [1]:
import pandas as pd
import random

# Loc-Aware & Loc-Agnostic Captions

In [2]:
tradition_df = pd.read_csv('combined_seavqa_output.csv').loc[:, ['source','model','image_description','image_url','caption']].dropna()
cuisine_df = pd.read_csv('combined_worldcuisine_output.csv').loc[:, ['source','model','image_description','image_url','caption']].dropna()

tradition_df['source'] = 'SEA-VQA'
cuisine_df['source'] = 'WorldCuisine'

tradition_df['correctness'] = None
tradition_df['naturalness'] = None
cuisine_df['correctness'] = None
cuisine_df['naturalness'] = None

loc_agnostic_df = pd.concat([tradition_df, cuisine_df])
loc_agnostic_df['type'] = 'loc-agnostic'

In [3]:
tradition_loc_df = pd.read_csv('seafiltered_location-aware_prompting_combined_output.csv').dropna()
cuisine_loc_df = pd.read_csv('worldcuisine_location-aware_prompting_combined_output.csv').dropna()

tradition_loc_df = tradition_loc_df.loc[:, ['Source', 'Model ID', 'culture_desc', 'Image', 'Caption']]
tradition_loc_df.columns = ['source', 'model', 'image_description', 'image_url', 'caption']
cuisine_loc_df = cuisine_loc_df.loc[:, ['source', 'model', 'image_description', 'image_url', 'caption']]

tradition_loc_df['source'] = 'SEA-VQA'
cuisine_loc_df['source'] = 'WorldCuisine'

tradition_loc_df['correctness'] = None
tradition_loc_df['naturalness'] = None
cuisine_loc_df['correctness'] = None
cuisine_loc_df['naturalness'] = None

loc_aware_df = pd.concat([tradition_loc_df, cuisine_loc_df])
loc_aware_df['type'] = 'loc-aware'

In [4]:
url_counts = loc_aware_df.groupby('image_url').size()
loc_agnostic_df = loc_agnostic_df.loc[loc_agnostic_df['image_url'].isin(url_counts[url_counts == 4].index),:]
loc_aware_df = loc_aware_df.loc[loc_aware_df['image_url'].isin(url_counts[url_counts == 4].index),:]
loc_agnostic_df.shape, loc_aware_df.shape

((5332, 8), (5332, 8))

In [5]:
image_urls = loc_aware_df['image_url'].sample(200, random_state=14045).tolist()
food_urls, tradition_urls = [], []
for img_url in list(set(image_urls)):
    if 'unesco' in img_url:
        tradition_urls.append(img_url)
    else:
        food_urls.append(img_url)

random.seed(14042)
image_urls = tradition_urls[:25] + food_urls[:25]
random.shuffle(image_urls)

In [6]:
filt_loc_agnostic_df = loc_agnostic_df.loc[loc_agnostic_df['image_url'].isin(image_urls),:]
filt_loc_aware_df = loc_aware_df.loc[loc_aware_df['image_url'].isin(image_urls),:]

filt_loc_agnostic_df.shape, filt_loc_aware_df.shape

((200, 8), (200, 8))

In [7]:
# Standardize Image Description
filt_loc_aware_df.loc[:,'image_description'] = filt_loc_aware_df.loc[:,'image_url'].apply(
    lambda x: filt_loc_agnostic_df.loc[filt_loc_agnostic_df['image_url'] == x, 'image_description'].values[0]
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[selected_item_labels] = value


# Create Human Caption

In [8]:
tradition_loc_df = pd.read_csv('seafiltered_location-aware_prompting_combined_output.csv').dropna()
cuisine_df = pd.read_csv('combined_worldcuisine_output.csv').dropna()

In [9]:
tradition_human_df = tradition_loc_df.loc[tradition_loc_df['Image'].isin(image_urls), ['Image', 'gt_caption']].groupby('Image').head(1)
tradition_human_df['caption'] = tradition_human_df['gt_caption']
tradition_human_df = tradition_human_df[['Image', 'caption']].rename({'Image': 'image_url'}, axis='columns')
tradition_human_df['source'] = 'SEA-VQA'
tradition_human_df['model'] = 'Human'
tradition_human_df.loc[:,'image_description'] = tradition_human_df.loc[:,'image_url'].apply(
    lambda x: filt_loc_agnostic_df.loc[filt_loc_agnostic_df['image_url'] == x, 'image_description'].values[0]
)
tradition_human_df['correctness'] = None
tradition_human_df['naturalness'] = None

In [10]:
cuisine_human_df = cuisine_df.loc[cuisine_df['image_url'].isin(image_urls), ['image_url', 'cuisine_name', 'image_description']].groupby('image_url').head(1)
cuisine_human_df['caption'] = cuisine_df.apply(lambda x: f'This is a picture of {x["cuisine_name"]}. {x["image_description"]}.', axis='columns')
cuisine_human_df = cuisine_human_df[['image_url', 'caption']]
cuisine_human_df['source'] = 'WorldCuisine'
cuisine_human_df['model'] = 'Human'
cuisine_human_df.loc[:,'image_description'] = cuisine_human_df.loc[:,'image_url'].apply(
    lambda x: filt_loc_agnostic_df.loc[filt_loc_agnostic_df['image_url'] == x, 'image_description'].values[0]
)
cuisine_human_df['correctness'] = None
cuisine_human_df['naturalness'] = None

In [11]:
human_df = pd.concat([tradition_human_df, cuisine_human_df])
human_df['type'] = 'human'

In [None]:
tradition_loc_df['culture_name'].unique()

array(['Jamu wellness culture', 'Gamelan', 'Pantun',
       'Traditions of Pencak Silat',
       'Pinisi, art of boatbuilding in South Sulawesi',
       'Three genres of traditional dance in Bali',
       'Noken multifunctional knotted or woven bag, handcraft of the people of Papua',
       'Saman dance', 'Indonesian Angklung',
       'Education and training in Indonesian Batik intangible cultural heritage for elementary, junior, senior, vocational school and polytechnic students, in collaboration with the Batik Museum in Pekalongan',
       'Indonesian Kris',
       'Kun Lbokator, traditional martial arts in Cambodia',
       'Lkhon Khol Wat Svay Andet', 'Chapei Dang Veng',
       'Tugging rituals and games', 'Royal ballet of Cambodia',
       'Sbek Thom, Khmer shadow theatre',
       'Traditional craft of Naga motif weaving in Lao communities',
       'Khaen music of the Lao people', 'Mek Mulung', 'Songket',
       'Ong Chun/Wangchuan/Wangkang ceremony, rituals and related practices 

In [12]:
cuisine_df

Unnamed: 0,source,model,category,cuisines,cuisine_name,image_url,caption,image_description
0,Worldcuisine,paligemma2-10b-ft-docci-448,Food,Indonesian,Rawon,https://upload.wikimedia.org/wikipedia/commons...,A close-up view of a brown plate with a brown ...,The soup is composed of a ground mixture of ga...
1,Worldcuisine,paligemma2-10b-ft-docci-448,Food,Indonesian,Rawon,https://upload.wikimedia.org/wikipedia/commons...,The image is of a bowl of food with a dark bro...,The soup is composed of a ground mixture of ga...
2,Worldcuisine,paligemma2-10b-ft-docci-448,Food,Indonesian,Rawon,https://upload.wikimedia.org/wikipedia/commons...,A close-up view of a white plate with a black ...,The soup is composed of a ground mixture of ga...
3,Worldcuisine,paligemma2-10b-ft-docci-448,Food,Indonesian,Rawon,https://upload.wikimedia.org/wikipedia/commons...,The image is of a white plate with a dark brow...,The soup is composed of a ground mixture of ga...
4,Worldcuisine,paligemma2-10b-ft-docci-448,Food,Indonesian,Rawon,https://upload.wikimedia.org/wikipedia/commons...,The image is a close-up view of a brown plate ...,The soup is composed of a ground mixture of ga...
...,...,...,...,...,...,...,...,...
3711,Worldcuisine,qwen2-VL-7B-Instruct,Food,"Malaysian, Singaporean, Indonesian",Tee long pan,https://upload.wikimedia.org/wikipedia/commons...,Indulging in a traditional Southeast Asian del...,a steamed rice roll serve with shrimp paste or...
3712,Worldcuisine,qwen2-VL-7B-Instruct,Food,Vietnamese,Bánh bò,https://upload.wikimedia.org/wikipedia/commons...,Experience the vibrant flavors of Southeast As...,"The dish is a chewy cake made of rice flour, w..."
3713,Worldcuisine,qwen2-VL-7B-Instruct,Food,Indonesian,Milk pie,https://upload.wikimedia.org/wikipedia/commons...,Indulge in the sweet delights of Southeast Asi...,Custard tart pastry consisting of a shortcrust...
3714,Worldcuisine,qwen2-VL-7B-Instruct,Food,Indonesian,Milk pie,https://upload.wikimedia.org/wikipedia/commons...,Indulge in the delightful flavors of Southeast...,Custard tart pastry consisting of a shortcrust...


# Merge & Reshuffle Caption Data

In [12]:
annot_df = pd.concat([filt_loc_agnostic_df, filt_loc_aware_df, human_df])[
    ['source', 'type', 'model', 'image_description', 'image_url', 'caption', 'correctness', 'naturalness']
]
annot_df = annot_df.sample(annot_df.shape[0], random_state=12345).reset_index(drop=True)
annot_df['image_url'] = annot_df['image_url'].apply(lambda x: f'=IMAGE("{x}")')

In [14]:
annot_df.to_csv('annot_image_captioning.csv', index=False)

In [10]:
tradition_df = pd.read_csv('combined_seavqa_output.csv')
cuisine_df = pd.read_csv('combined_worldcuisine_output.csv')

In [11]:
tradition_df[['image_description','culture_name']].drop_duplicates().to_csv('culture_metadata.csv', index=False)

In [12]:
cuisine_df[['image_description', 'cuisine_name']].drop_duplicates().to_csv('cuisine_metadata.csv', index=False)

# Create Human Data for Image Gen

In [35]:
tradition_loc_df = pd.read_csv('seafiltered_location-aware_prompting_combined_output.csv').dropna()
cuisine_df = pd.read_csv('combined_worldcuisine_output.csv').dropna()

In [40]:
# Create Human Data for 
tradition_loc_df['caption'] = tradition_loc_df['culture_name'].apply(lambda x: f'An image of people doing {x}')
tradition_loc_df['type'] = 'culture'
tradition_loc_df['image'] = tradition_loc_df['Image']
tradition_loc_df['model'] = 'Human'
tradition_loc_df.loc[
    :, ['type','model','caption','image']
].groupby('image').head(1).to_csv('image_gen_culture_he.csv', index=False)

In [42]:
cuisine_df['caption'] = cuisine_df['cuisine_name'].apply(lambda x: f'An image of people eating {x}')
cuisine_df['type'] = 'worldcuisine'
cuisine_df['image'] = cuisine_df['image_url']
cuisine_df['model'] = 'human'
cuisine_df.loc[
    :, ['type','model','caption','image']
].groupby('caption').head(1).sample(50, random_state=14045).to_csv('image_gen_cuisine_he.csv', index=False)