# Import libraries

In [None]:
!pip install einops timm sentencepiece

In [None]:
import inspect
import os
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
import torch
from transformers import AutoModel, AutoTokenizer
torch.set_grad_enabled(False)

# Instantiate model

In [None]:
# Instantiate InternLM-XComposer model. via
# https://github.com/InternLM/InternLM-XComposer
model = AutoModel.from_pretrained('internlm/internlm-xcomposer-7b', trust_remote_code=True).cuda().eval()
tokenizer = AutoTokenizer.from_pretrained('internlm/internlm-xcomposer-7b', trust_remote_code=True)
model.tokenizer = tokenizer

# Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Create directory to store inferences

In [None]:
os.makedirs('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions', exist_ok=True)

# Import datasets

## US-Politics

In [None]:
constraint22_dataset_uspolitics_test_captioned_Qwen_VL_Chat = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/constraint22_dataset_uspolitics_test_captioned_Qwen-VL-Chat.csv')
constraint22_dataset_uspolitics_test_captioned_InternLM_XComposer = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/constraint22_dataset_uspolitics_test_captioned_InternLM-XComposer.csv')
constraint22_dataset_uspolitics_test_captioned_llava = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/constraint22_dataset_uspolitics_test_captioned_llava-v1.5-13b.csv')
constraint22_dataset_uspolitics_test_captioned_BLIP_2 = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/constraint22_dataset_uspolitics_test_captioned_BLIP-2.csv')

In [None]:
constraint22_dataset_uspolitics_test_captioned_Qwen_VL_Chat = constraint22_dataset_uspolitics_test_captioned_Qwen_VL_Chat.dropna().reset_index(drop=True)
constraint22_dataset_uspolitics_test_captioned_InternLM_XComposer = constraint22_dataset_uspolitics_test_captioned_InternLM_XComposer.dropna().reset_index(drop=True)
constraint22_dataset_uspolitics_test_captioned_llava = constraint22_dataset_uspolitics_test_captioned_llava.dropna().reset_index(drop=True)
constraint22_dataset_uspolitics_test_captioned_BLIP_2 = constraint22_dataset_uspolitics_test_captioned_BLIP_2.dropna().reset_index(drop=True)

# Set prompts and define a function to call the model

In [None]:
def prompt_vanilla(entity):
    return inspect.cleandoc(f"""
    What is the role of {entity} in this meme?
    hero: presented in a positive light.
    villain: portrayed negatively, e.g., in an association with adverse traits like wickedness, cruelty, hypocrisy, etc.
    victim: portrayed as suffering the negative impact of someone else’s actions.
    other: not a hero, a villain, or a victim.
    Constraint: Without using any other words, answer either hero, villain, victim, other.""")

In [None]:
def prompt_with_OCR(entity, OCR):
    return inspect.cleandoc(f"""
    Text on this meme: \"\"\"
    {OCR}
    \"\"\"
    What is the role of {entity} in this meme?
    hero: presented in a positive light.
    villain: portrayed negatively, e.g., in an association with adverse traits like wickedness, cruelty, hypocrisy, etc.
    victim: portrayed as suffering the negative impact of someone else’s actions.
    other: not a hero, a villain, or a victim.
    Constraint: Without using any other words, answer either hero, villain, victim, other.""")

In [None]:
def prompt_with_caption(entity, caption):
    return inspect.cleandoc(f"""
    Description of this meme: \"\"\"
    {caption}
    \"\"\"
    What is the role of {entity} in this meme?
    hero: presented in a positive light.
    villain: portrayed negatively, e.g., in an association with adverse traits like wickedness, cruelty, hypocrisy, etc.
    victim: portrayed as suffering the negative impact of someone else’s actions.
    other: not a hero, a villain, or a victim.
    Constraint: Without using any other words, answer either hero, villain, victim, other.""")

In [None]:
# Use InternLM-XComposer for the inference. via
# https://github.com/InternLM/InternLM-XComposer
def get_prediction(image, prompt):
    response = model.generate(prompt, image)
    return response

# Define function to clean responses

In [None]:
def remap(x):
    x = x.lower()
    if x in {'hero', 'villain', 'victim', 'other'}:
        return x
    elif 'hero' in x and 'villain' not in x and 'victim' not in x and 'other' not in x and 'not hero' not in x and 'not a hero' not in x:
        return 'hero'
    elif 'villain' in x and 'hero' not in x and 'victim' not in x and 'other' not in x and 'not villain' not in x and 'not a villain' not in x:
        return 'villain'
    elif 'victim' in x and 'villain' not in x and 'hero' not in x and 'other' not in x and 'not victim' not in x and 'not a victim' not in x:
        return 'victim'
    elif 'other' in x and 'villain' not in x and 'victim' not in x and 'hero' not in x and 'not other' not in x and 'not an other' not in x:
        return 'other'
    else:
        return None

# Call the `get_prediction` function and save inferences

GROUNDING [ABSENT] & PROMPT [VANILLA]

In [None]:
uspolitics_test_grounding_absent_prompt_vanilla = constraint22_dataset_uspolitics_test_captioned_Qwen_VL_Chat.copy(deep=True)
uspolitics_test_grounding_absent_prompt_vanilla_images = uspolitics_test_grounding_absent_prompt_vanilla['image'].values
uspolitics_test_grounding_absent_prompt_vanilla_entities = uspolitics_test_grounding_absent_prompt_vanilla['entity'].values
uspolitics_test_grounding_absent_prompt_vanilla['prediction'] = [get_prediction(image, prompt_vanilla(entity)) for image, entity in zip(uspolitics_test_grounding_absent_prompt_vanilla_images, uspolitics_test_grounding_absent_prompt_vanilla_entities)]
uspolitics_test_grounding_absent_prompt_vanilla.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions/constraint22_dataset_uspolitics_test_grounding[ABSENT]_prompt[VANILLA]_prediction[InternLM-XComposer].csv', index=False)
uspolitics_test_grounding_absent_prompt_vanilla['prediction'] = uspolitics_test_grounding_absent_prompt_vanilla['prediction'].apply(lambda x: remap(x))
print(uspolitics_test_grounding_absent_prompt_vanilla['prediction'].value_counts())
print(uspolitics_test_grounding_absent_prompt_vanilla['prediction'].isna().sum())
uspolitics_test_grounding_absent_prompt_vanilla['prediction'] = uspolitics_test_grounding_absent_prompt_vanilla['prediction'].apply(lambda x: x if x is not None else np.random.choice(['hero', 'villain', 'victim', 'other']))
print(f1_score(uspolitics_test_grounding_absent_prompt_vanilla['role'].values, uspolitics_test_grounding_absent_prompt_vanilla['prediction'].values, labels=['hero', 'villain', 'victim', 'other'], average='macro'))
print(classification_report(uspolitics_test_grounding_absent_prompt_vanilla['role'].values, uspolitics_test_grounding_absent_prompt_vanilla['prediction'].values, labels=['hero', 'villain', 'victim', 'other']))

victim     375
other      268
villain    181
hero       171
Name: prediction, dtype: int64
0
0.32422961155392843
              precision    recall  f1-score   support

        hero       0.37      0.26      0.30       246
     villain       0.38      0.27      0.32       250
      victim       0.35      0.52      0.42       249
       other       0.25      0.27      0.26       250

    accuracy                           0.33       995
   macro avg       0.34      0.33      0.32       995
weighted avg       0.34      0.33      0.32       995



GROUNDING [ABSENT] & PROMPT [OCR]

In [None]:
uspolitics_test_grounding_absent_prompt_with_OCR = constraint22_dataset_uspolitics_test_captioned_Qwen_VL_Chat.copy(deep=True)
uspolitics_test_grounding_absent_prompt_with_OCR_images = uspolitics_test_grounding_absent_prompt_with_OCR['image'].values
uspolitics_test_grounding_absent_prompt_with_OCR_entities = uspolitics_test_grounding_absent_prompt_with_OCR['entity'].values
uspolitics_test_grounding_absent_prompt_with_OCR_texts = uspolitics_test_grounding_absent_prompt_with_OCR['OCR'].values
uspolitics_test_grounding_absent_prompt_with_OCR['prediction'] = [get_prediction(image, prompt_with_OCR(entity, text)) for image, entity, text in zip(uspolitics_test_grounding_absent_prompt_with_OCR_images, uspolitics_test_grounding_absent_prompt_with_OCR_entities, uspolitics_test_grounding_absent_prompt_with_OCR_texts)]
uspolitics_test_grounding_absent_prompt_with_OCR.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions/constraint22_dataset_uspolitics_test_grounding[ABSENT]_prompt[OCR]_prediction[InternLM-XComposer].csv', index=False)
uspolitics_test_grounding_absent_prompt_with_OCR['prediction'] = uspolitics_test_grounding_absent_prompt_with_OCR['prediction'].apply(lambda x: remap(x))
print(uspolitics_test_grounding_absent_prompt_with_OCR['prediction'].value_counts())
print(uspolitics_test_grounding_absent_prompt_with_OCR['prediction'].isna().sum())
uspolitics_test_grounding_absent_prompt_with_OCR['prediction'] = uspolitics_test_grounding_absent_prompt_with_OCR['prediction'].apply(lambda x: x if x is not None else np.random.choice(['hero', 'villain', 'victim', 'other']))
print(f1_score(uspolitics_test_grounding_absent_prompt_with_OCR['role'].values, uspolitics_test_grounding_absent_prompt_with_OCR['prediction'].values, labels=['hero', 'villain', 'victim', 'other'], average='macro'))
print(classification_report(uspolitics_test_grounding_absent_prompt_with_OCR['role'].values, uspolitics_test_grounding_absent_prompt_with_OCR['prediction'].values, labels=['hero', 'villain', 'victim', 'other']))

victim     721
villain    152
hero        80
other       42
Name: prediction, dtype: int64
0
0.30662418933582747
              precision    recall  f1-score   support

        hero       0.74      0.24      0.36       246
     villain       0.45      0.27      0.34       250
      victim       0.31      0.89      0.46       249
       other       0.24      0.04      0.07       250

    accuracy                           0.36       995
   macro avg       0.43      0.36      0.31       995
weighted avg       0.43      0.36      0.31       995



GROUNDING [ABSENT] & PROMPT [CAPTION] & CAPTION [Qwen_VL_Chat]

In [None]:
uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat = constraint22_dataset_uspolitics_test_captioned_Qwen_VL_Chat.copy(deep=True)
uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat_images = uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['image'].values
uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat_entities = uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['entity'].values
uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat_captions = uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['caption'].values
uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'] = [get_prediction(image, prompt_with_caption(entity, caption)) for image, entity, caption in zip(uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat_images, uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat_entities, uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat_captions)]
uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions/constraint22_dataset_uspolitics_test_grounding[ABSENT]_caption[Qwen-VL-Chat]_prompt[CAPTION]_prediction[InternLM-XComposer].csv', index=False)
uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'] = uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].apply(lambda x: remap(x))
print(uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].value_counts())
print(uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].isna().sum())
uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'] = uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].apply(lambda x: x if x is not None else np.random.choice(['hero', 'villain', 'victim', 'other']))
print(f1_score(uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['role'].values, uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].values, labels=['hero', 'villain', 'victim', 'other'], average='macro'))
print(classification_report(uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['role'].values, uspolitics_test_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].values, labels=['hero', 'villain', 'victim', 'other']))

villain    341
victim     317
other      278
hero        59
Name: prediction, dtype: int64
0
0.33460766584259405
              precision    recall  f1-score   support

        hero       0.64      0.15      0.25       246
     villain       0.33      0.45      0.38       250
      victim       0.39      0.50      0.44       249
       other       0.25      0.28      0.27       250

    accuracy                           0.35       995
   macro avg       0.41      0.35      0.33       995
weighted avg       0.40      0.35      0.33       995



GROUNDING [ABSENT] & PROMPT [CAPTION] & CAPTION [InternLM_XComposer]

In [None]:
uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer = constraint22_dataset_uspolitics_test_captioned_InternLM_XComposer.copy(deep=True)
uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer_images = uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['image'].values
uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer_entities = uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['entity'].values
uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer_captions = uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['caption'].values
uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'] = [get_prediction(image, prompt_with_caption(entity, caption)) for image, entity, caption in zip(uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer_images, uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer_entities, uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer_captions)]
uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions/constraint22_dataset_uspolitics_test_grounding[ABSENT]_caption[InternLM-XComposer]_prompt[CAPTION]_prediction[InternLM-XComposer].csv', index=False)
uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'] = uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].apply(lambda x: remap(x))
print(uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].value_counts())
print(uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].isna().sum())
uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'] = uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].apply(lambda x: x if x is not None else np.random.choice(['hero', 'villain', 'victim', 'other']))
print(f1_score(uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['role'].values, uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].values, labels=['hero', 'villain', 'victim', 'other'], average='macro'))
print(classification_report(uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['role'].values, uspolitics_test_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].values, labels=['hero', 'villain', 'victim', 'other']))

villain    401
victim     340
other      156
hero        98
Name: prediction, dtype: int64
0
0.38669640754108675
              precision    recall  f1-score   support

        hero       0.68      0.27      0.39       246
     villain       0.38      0.61      0.47       250
      victim       0.41      0.56      0.48       249
       other       0.28      0.17      0.21       250

    accuracy                           0.41       995
   macro avg       0.44      0.40      0.39       995
weighted avg       0.44      0.41      0.39       995



GROUNDING [ABSENT] & PROMPT [CAPTION] & CAPTION [llava]

In [None]:
uspolitics_test_grounding_absent_prompt_with_caption_llava = constraint22_dataset_uspolitics_test_captioned_llava.copy(deep=True)
uspolitics_test_grounding_absent_prompt_with_caption_llava_images = uspolitics_test_grounding_absent_prompt_with_caption_llava['image'].values
uspolitics_test_grounding_absent_prompt_with_caption_llava_entities = uspolitics_test_grounding_absent_prompt_with_caption_llava['entity'].values
uspolitics_test_grounding_absent_prompt_with_caption_llava_captions = uspolitics_test_grounding_absent_prompt_with_caption_llava['caption'].values
uspolitics_test_grounding_absent_prompt_with_caption_llava['prediction'] = [get_prediction(image, prompt_with_caption(entity, caption)) for image, entity, caption in zip(uspolitics_test_grounding_absent_prompt_with_caption_llava_images, uspolitics_test_grounding_absent_prompt_with_caption_llava_entities, uspolitics_test_grounding_absent_prompt_with_caption_llava_captions)]
uspolitics_test_grounding_absent_prompt_with_caption_llava.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions/constraint22_dataset_uspolitics_test_grounding[ABSENT]_caption[llava-v1.5-13b]_prompt[CAPTION]_prediction[InternLM-XComposer].csv', index=False)
uspolitics_test_grounding_absent_prompt_with_caption_llava['prediction'] = uspolitics_test_grounding_absent_prompt_with_caption_llava['prediction'].apply(lambda x: remap(x))
print(uspolitics_test_grounding_absent_prompt_with_caption_llava['prediction'].value_counts())
print(uspolitics_test_grounding_absent_prompt_with_caption_llava['prediction'].isna().sum())
uspolitics_test_grounding_absent_prompt_with_caption_llava['prediction'] = uspolitics_test_grounding_absent_prompt_with_caption_llava['prediction'].apply(lambda x: x if x is not None else np.random.choice(['hero', 'villain', 'victim', 'other']))
print(f1_score(uspolitics_test_grounding_absent_prompt_with_caption_llava['role'].values, uspolitics_test_grounding_absent_prompt_with_caption_llava['prediction'].values, labels=['hero', 'villain', 'victim', 'other'], average='macro'))
print(classification_report(uspolitics_test_grounding_absent_prompt_with_caption_llava['role'].values, uspolitics_test_grounding_absent_prompt_with_caption_llava['prediction'].values, labels=['hero', 'villain', 'victim', 'other']))

other      383
villain    296
victim     253
hero        63
Name: prediction, dtype: int64
0
0.3419922463754498
              precision    recall  f1-score   support

        hero       0.65      0.17      0.27       246
     villain       0.34      0.40      0.37       250
      victim       0.41      0.41      0.41       249
       other       0.27      0.41      0.32       250

    accuracy                           0.35       995
   macro avg       0.42      0.35      0.34       995
weighted avg       0.42      0.35      0.34       995



GROUNDING [ABSENT] & PROMPT [CAPTION] & CAPTION [BLIP-2]

In [None]:
uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2 = constraint22_dataset_uspolitics_test_captioned_BLIP_2.copy(deep=True)
uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2_images = uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['image'].values
uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2_entities = uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['entity'].values
uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2_captions = uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['caption'].values
uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['prediction'] = [get_prediction(image, prompt_with_caption(entity, caption)) for image, entity, caption in zip(uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2_images, uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2_entities, uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2_captions)]
uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions/constraint22_dataset_uspolitics_test_grounding[ABSENT]_caption[BLIP_2]_prompt[CAPTION]_prediction[InternLM-XComposer].csv', index=False)
uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['prediction'] = uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['prediction'].apply(lambda x: remap(x))
print(uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['prediction'].value_counts())
print(uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['prediction'].isna().sum())
uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['prediction'] = uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['prediction'].apply(lambda x: x if x is not None else np.random.choice(['hero', 'villain', 'victim', 'other']))
print(f1_score(uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['role'].values, uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['prediction'].values, labels=['hero', 'villain', 'victim', 'other'], average='macro'))
print(classification_report(uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['role'].values, uspolitics_test_grounding_absent_prompt_with_caption_BLIP_2['prediction'].values, labels=['hero', 'villain', 'victim', 'other']))

other      467
victim     251
villain    194
hero        83
Name: prediction, dtype: int64
0
0.3537726002891141
              precision    recall  f1-score   support

        hero       0.54      0.18      0.27       246
     villain       0.38      0.30      0.33       250
      victim       0.44      0.44      0.44       249
       other       0.28      0.53      0.37       250

    accuracy                           0.36       995
   macro avg       0.41      0.36      0.35       995
weighted avg       0.41      0.36      0.35       995

