# Load annotated data

In [1]:
import json
import pprint 
pp = pprint.PrettyPrinter(indent=4)
import numpy as np
import pandas as pd

# dataset_name = 'meme_retrieval_data'
dataset_name = 'figmemes'
# dataset_name = 'memecap'

img_prompt_respond_file = ''
gt_data_file = ''
if dataset_name == 'meme_retrieval_data':
    img_prompt_respond_file = '../data/meme_retrieval_data/filtered_meme_configs_5_attributes_meme_retri.json'
elif dataset_name == 'figmemes':
    img_prompt_respond_file = '../data/figmemes/filtered_meme_configs_5_attributes_figmemes.json'
    gt_data_file = '../data/figmemes/figmemes_annotations.tsv'
elif  dataset_name == 'memecap':
    img_prompt_respond_file = '../data/memecap/filtered_meme_configs_5_attributes_memecap.json'

# load predicted data
meme_configs = []
with open(img_prompt_respond_file, 'r', encoding='utf-8') as json_file:
    meme_configs = json.load(json_file)
len(meme_configs)

# load ground truth data
gt_data = pd.read_csv(gt_data_file, sep='\t', index_col='img_name')

In [2]:
gt_data

Unnamed: 0_level_0,allusion,exaggeration,irony,anthrop,metaphor,contrast,year,arts,real,mixed,infograph
img_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
01144951214.png,0,1,0,0,0,0,2017,0,0,1,0
02004371131.jpg,1,0,0,0,1,0,2017,1,0,0,0
07641571981.jpg,0,1,0,0,0,0,2021,0,1,0,0
11387899863.png,0,0,0,0,1,0,2018,0,1,0,0
11718198987.jpg,0,1,0,0,0,0,2017,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
1638163727373.jpg,0,0,0,0,1,0,2021,1,0,0,0
1564893951723.jpg,0,0,0,0,0,0,2019,1,0,0,0
1614835018272.png,0,0,0,1,0,0,2021,1,0,0,0
1565502448498.png,0,0,0,0,1,0,2019,1,0,0,0


In [3]:
gt_data.loc['01144951214.png'][:6].tolist()

[0, 1, 0, 0, 0, 0]

In [4]:
def label2vec(label: str):
    labels = ['allusion', 'exaggeration', 'irony', 'anthrop', 'metaphor', 'contrast']
    vec = np.zeros((1, len(labels)))
    vec[0, labels.index(label)] = 1
    return vec

label2vec('exaggeration')

array([[0., 1., 0., 0., 0., 0.]])

In [5]:
meme_configs[0]

{'image_dir': './data/figmemes/images/1581691318761.png',
 'visual elaboration': 'The image features a manga-style illustration with two male characters. The elder one has an affectionate expression and is hugging a child. Another adult male, wearing an apron, stands in the background with crossed arms, looking at the pair. There is also a speech bubble saying "KIDS ARE LUCKY. THEY DON\'T HAVE TO THINK ABOUT THAT STUFF."',
 'detected text': '"My Brother\'s Husband Is the Hottest Family-Friendly Manga Ever Made" by Chase Burns · Feb 11, 2020 at 100 pm.  "KIDS ARE LUCKY. THEY DON\'T HAVE TO THINK ABOUT THAT STUFF."',
 'meaning of the meme': 'The meme humorously implies that, despite being a family-friendly manga, "My Brother\'s Husband" contains themes that adults find deeper or more complicated, which children might not fully grasp, thus making them "lucky."',
 'literary device': 'irony',
 'emotion word': 'none'}

In [6]:
predictions = []
gt = []
for idx, meme_conf in enumerate(meme_configs):
    predictions.append(label2vec(meme_conf['literary device']))
    gt.append(gt_data.loc[meme_conf['image_dir'].split('/')[-1]][:6].tolist())

predictions = np.array(predictions).squeeze()
gt = np.array(gt)
predictions.shape, gt.shape

((1407, 6), (1407, 6))

In [7]:
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

print('macro:', f1_score(gt.flatten(), predictions.flatten(), average='macro'))
print('micro:', f1_score(gt.flatten(), predictions.flatten(), average='micro'))
print(classification_report(gt.flatten(), predictions.flatten()))

macro: 0.58906654587494
micro: 0.7782515991471216
              precision    recall  f1-score   support

           0       0.87      0.86      0.87      7135
           1       0.30      0.32      0.31      1307

    accuracy                           0.78      8442
   macro avg       0.59      0.59      0.59      8442
weighted avg       0.79      0.78      0.78      8442



In [8]:
def gimme_f1s(y_true, y_pred):
    print('zero')
    print(classification_report(y_true=y_true, y_pred=y_pred, zero_division=0))
    print('one')
    print(classification_report(y_true=y_true, y_pred=y_pred, zero_division=1))
    print()
    print('zero')
    f1s = ['micro', 'macro', 'weighted', 'samples']
    for score in f1s:
        f1 = f1_score(y_true=y_true, y_pred=y_pred, zero_division=0, average=score)*100
        print(score)
        print(f1)
        print()
    print('one')
    for score in f1s:
        f1 = f1_score(y_true=y_true, y_pred=y_pred, zero_division=1, average=score)*100
        print(score)
        print(f1)
        print()

gimme_f1s(gt, predictions)

zero
              precision    recall  f1-score   support

           0       0.40      0.19      0.26       247
           1       0.28      0.31      0.30       235
           2       0.28      0.69      0.40       289
           3       0.40      0.02      0.03       115
           4       0.63      0.06      0.12       262
           5       0.27      0.53      0.36       159

   micro avg       0.30      0.32      0.31      1307
   macro avg       0.38      0.30      0.24      1307
weighted avg       0.38      0.32      0.26      1307
 samples avg       0.30      0.23      0.25      1307

one
              precision    recall  f1-score   support

           0       0.40      0.19      0.26       247
           1       0.28      0.31      0.30       235
           2       0.28      0.69      0.40       289
           3       0.40      0.02      0.03       115
           4       0.63      0.06      0.12       262
           5       0.27      0.53      0.36       159

   micro avg  

# Only concider the samples that have ground truth labels and only concider one label
Because GPT-4o were forced to choose a label

In [None]:
# non_zero_idx = gt.sum(axis=1) != 0
# gt[non_zero_idx, :].shape, predictions[non_zero_idx, :].shape
# gimme_f1s(gt, predictions)