# Import libraries

In [None]:
!pip install bitsandbytes>=0.39.0 accelerate>=0.20.0
!pip install transformers

In [None]:
import inspect
import os
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration

# Instantiate model

In [None]:
# Use BLIP-2 for the inference. via
# https://huggingface.co/Salesforce/blip2-flan-t5-xxl
processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xxl")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xxl", device_map="auto", load_in_8bit=True)

# Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Create directory to store inferences

In [None]:
os.makedirs('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions', exist_ok=True)

# Import datasets

In [None]:
total_defense_memes = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/total_defense_memes/total_defense_memes.csv')
total_defense_memes = total_defense_memes[total_defense_memes['pillar'].str.contains('Others') == False]
total_defense_memes = total_defense_memes.reset_index(drop=True)

In [None]:
total_defense_memes_captioned_Qwen_VL_Chat = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/total_defense_memes_captioned_Qwen-VL-Chat.csv')
total_defense_memes_captioned_InternLM_XComposer = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/total_defense_memes_captioned_InternLM-XComposer.csv')
total_defense_memes_captioned_llava = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/total_defense_memes_captioned_llava-v1.5-13b.csv')
total_defense_memes_captioned_BLIP_2 = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/total_defense_memes_captioned_BLIP-2.csv')

In [None]:
total_defense_memes_captioned_Qwen_VL_Chat = pd.merge(total_defense_memes, total_defense_memes_captioned_Qwen_VL_Chat, how='left', on=['image'])
total_defense_memes_captioned_InternLM_XComposer = pd.merge(total_defense_memes, total_defense_memes_captioned_InternLM_XComposer, how='left', on=['image'])
total_defense_memes_captioned_llava = pd.merge(total_defense_memes, total_defense_memes_captioned_llava, how='left', on=['image'])
total_defense_memes_captioned_BLIP_2 = pd.merge(total_defense_memes, total_defense_memes_captioned_BLIP_2, how='left', on=['image'])

# Set prompts and define a function to call the model

In [None]:
def prompt_with_caption_goodneutralbad(pillar, pillar_w_definition, caption):
    return inspect.cleandoc(f"""
    Description of this meme: \"\"\"
    {caption}
    \"\"\"
    {pillar_w_definition}
    Indicate whether this meme is bad, neutral or good towards Singapore's {pillar}.
    Constraint: Without using any other words, answer either bad, neutral, good.""")

In [None]:
# Use BLIP-2 for the inference. via
# https://huggingface.co/Salesforce/blip2-flan-t5-xxl
def get_prediction(image, prompt):
    raw_image = Image.open(image).convert('RGB')
    inputs = processor(raw_image, prompt, return_tensors="pt").to("cuda")
    out = model.generate(**inputs, max_new_tokens=30)
    return processor.decode(out[0], skip_special_tokens=True)

# Define function to add definitions

In [None]:
def definition(x):
    if x == 'Military Defence':
        return """Singapore's Military Defence: Strong and formidable defence force made up of Regulars and National Servicemen, and supported by the entire Singapore."""
    elif x == 'Civil Defence':
        return """Singapore's Civil Defence: Collective effort of the Singaporean society to spot signs of threats, respond effectively and recover quickly from crisis."""
    elif x == 'Economic Defence':
        return """Singapore's Economic Defence: Strong and resilient Singaporean economy that is globally competitive and able to bounce back from any crisis."""
    elif x == 'Social Defence':
        return """Singapore's Social Defence: Bonds that unite Singaporeans, built on trust and understanding among people of different races and religions, living in harmony and looking out for one another."""
    elif x == 'Psychological Defence':
        return """Singapore's Psychological Defence: The will and resolve to defend the Singaporean way of life and interests, the fighting spirit to overcome challenges together."""
    elif x == 'Digital Defence':
        return """Singapore's Digital Defence: Being secure, alert and responsible online."""

In [None]:
total_defense_memes_captioned_Qwen_VL_Chat['pillar_w_definition'] = total_defense_memes_captioned_Qwen_VL_Chat['pillar'].apply(lambda x: definition(x))
total_defense_memes_captioned_InternLM_XComposer['pillar_w_definition'] = total_defense_memes_captioned_InternLM_XComposer['pillar'].apply(lambda x: definition(x))
total_defense_memes_captioned_llava['pillar_w_definition'] = total_defense_memes_captioned_llava['pillar'].apply(lambda x: definition(x))
total_defense_memes_captioned_BLIP_2['pillar_w_definition'] = total_defense_memes_captioned_BLIP_2['pillar'].apply(lambda x: definition(x))

# Define function to clean responses

In [None]:
def remap(x):
    x = x.lower()
    if 'bad' in x and 'neutral' not in x and 'good' not in x and 'not bad' not in x:
        return 'Against'
    elif 'neutral' in x and 'bad' not in x and 'good' not in x and 'not neutral' not in x:
        return 'Neutral'
    elif 'good' in x and 'neutral' not in x and 'bad' not in x and 'not good' not in x:
        return 'Supportive'
    else:
        return None

# Call the `get_prediction` function and save inferences

GROUNDING [ABSENT] & PROMPT [CAPTION_GOODNEUTRALBAD] & CAPTION [Qwen_VL_Chat]

In [None]:
total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat = total_defense_memes_captioned_Qwen_VL_Chat.copy(deep=True)
total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat_images = total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['image'].values
total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat_pillars = total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['pillar'].values
total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat_definitions = total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['pillar_w_definition'].values
total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat_captions = total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['caption'].values
total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'] = [get_prediction(image, prompt_with_caption_goodneutralbad(pillar, pillar_w_definition, caption)) for image, pillar, pillar_w_definition, caption in zip(total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat_images, total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat_pillars, total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat_definitions, total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat_captions)]
total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions/total_defense_memes_grounding[ABSENT]_caption[Qwen-VL-Chat]_prompt[CAPTION_GOODNEUTRALBAD]_prediction[BLIP-2].csv', index=False)
total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'] = total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].apply(lambda x: remap(x))
print(total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].value_counts())
print(total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].isna().sum())
total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'] = total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].apply(lambda x: x if x is not None else np.random.choice(['Against', 'Neutral', 'Supportive']))
print(f1_score(total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['stance'].values, total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].values, labels=['Against', 'Neutral', 'Supportive'], average='macro'))
print(classification_report(total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['stance'].values, total_defense_memes_grounding_absent_prompt_with_caption_Qwen_VL_Chat['prediction'].values, labels=['Against', 'Neutral', 'Supportive']))

Neutral       488
Against       208
Supportive     75
Name: prediction, dtype: int64
0
0.5404905292439168
              precision    recall  f1-score   support

     Against       0.72      0.43      0.54       352
     Neutral       0.55      0.81      0.65       332
  Supportive       0.47      0.40      0.43        87

    accuracy                           0.59       771
   macro avg       0.58      0.55      0.54       771
weighted avg       0.62      0.59      0.57       771



GROUNDING [ABSENT] & PROMPT [CAPTION_GOODNEUTRALBAD] & CAPTION [InternLM_XComposer]

In [None]:
total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer = total_defense_memes_captioned_InternLM_XComposer.copy(deep=True)
total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer_images = total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['image'].values
total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer_pillars = total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['pillar'].values
total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer_definitions = total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['pillar_w_definition'].values
total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer_captions = total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['caption'].values
total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'] = [get_prediction(image, prompt_with_caption_goodneutralbad(pillar, pillar_w_definition, caption)) for image, pillar, pillar_w_definition, caption in zip(total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer_images, total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer_pillars, total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer_definitions, total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer_captions)]
total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions/total_defense_memes_grounding[ABSENT]_caption[InternLM-XComposer]_prompt[CAPTION_GOODNEUTRALBAD]_prediction[BLIP-2].csv', index=False)
total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'] = total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].apply(lambda x: remap(x))
print(total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].value_counts())
print(total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].isna().sum())
total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'] = total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].apply(lambda x: x if x is not None else np.random.choice(['Against', 'Neutral', 'Supportive']))
print(f1_score(total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['stance'].values, total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].values, labels=['Against', 'Neutral', 'Supportive'], average='macro'))
print(classification_report(total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['stance'].values, total_defense_memes_grounding_absent_prompt_with_caption_InternLM_XComposer['prediction'].values, labels=['Against', 'Neutral', 'Supportive']))

Token indices sequence length is longer than the specified maximum sequence length for this model (578 > 512). Running this sequence through the model will result in indexing errors


Neutral       549
Against       155
Supportive     67
Name: prediction, dtype: int64
0
0.44649948264721684
              precision    recall  f1-score   support

     Against       0.64      0.28      0.39       352
     Neutral       0.50      0.83      0.62       332
  Supportive       0.37      0.29      0.32        87

    accuracy                           0.52       771
   macro avg       0.50      0.47      0.45       771
weighted avg       0.55      0.52      0.48       771



GROUNDING [ABSENT] & PROMPT [CAPTION_GOODNEUTRALBAD] & CAPTION [llava]

In [None]:
total_defense_memes_grounding_absent_prompt_with_caption_llava = total_defense_memes_captioned_llava.copy(deep=True)
total_defense_memes_grounding_absent_prompt_with_caption_llava_images = total_defense_memes_grounding_absent_prompt_with_caption_llava['image'].values
total_defense_memes_grounding_absent_prompt_with_caption_llava_pillars = total_defense_memes_grounding_absent_prompt_with_caption_llava['pillar'].values
total_defense_memes_grounding_absent_prompt_with_caption_llava_definitions = total_defense_memes_grounding_absent_prompt_with_caption_llava['pillar_w_definition'].values
total_defense_memes_grounding_absent_prompt_with_caption_llava_captions = total_defense_memes_grounding_absent_prompt_with_caption_llava['caption'].values
total_defense_memes_grounding_absent_prompt_with_caption_llava['prediction'] = [get_prediction(image, prompt_with_caption_goodneutralbad(pillar, pillar_w_definition, caption)) for image, pillar, pillar_w_definition, caption in zip(total_defense_memes_grounding_absent_prompt_with_caption_llava_images, total_defense_memes_grounding_absent_prompt_with_caption_llava_pillars, total_defense_memes_grounding_absent_prompt_with_caption_llava_definitions, total_defense_memes_grounding_absent_prompt_with_caption_llava_captions)]
total_defense_memes_grounding_absent_prompt_with_caption_llava.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions/total_defense_memes_grounding[ABSENT]_caption[llava-v1.5-13b]_prompt[CAPTION_GOODNEUTRALBAD]_prediction[BLIP-2].csv', index=False)
total_defense_memes_grounding_absent_prompt_with_caption_llava['prediction'] = total_defense_memes_grounding_absent_prompt_with_caption_llava['prediction'].apply(lambda x: remap(x))
print(total_defense_memes_grounding_absent_prompt_with_caption_llava['prediction'].value_counts())
print(total_defense_memes_grounding_absent_prompt_with_caption_llava['prediction'].isna().sum())
total_defense_memes_grounding_absent_prompt_with_caption_llava['prediction'] = total_defense_memes_grounding_absent_prompt_with_caption_llava['prediction'].apply(lambda x: x if x is not None else np.random.choice(['Against', 'Neutral', 'Supportive']))
print(f1_score(total_defense_memes_grounding_absent_prompt_with_caption_llava['stance'].values, total_defense_memes_grounding_absent_prompt_with_caption_llava['prediction'].values, labels=['Against', 'Neutral', 'Supportive'], average='macro'))
print(classification_report(total_defense_memes_grounding_absent_prompt_with_caption_llava['stance'].values, total_defense_memes_grounding_absent_prompt_with_caption_llava['prediction'].values, labels=['Against', 'Neutral', 'Supportive']))

Neutral       604
Supportive     86
Against        81
Name: prediction, dtype: int64
0
0.41329464555645656
              precision    recall  f1-score   support

     Against       0.78      0.18      0.29       352
     Neutral       0.49      0.90      0.64       332
  Supportive       0.31      0.31      0.31        87

    accuracy                           0.50       771
   macro avg       0.53      0.46      0.41       771
weighted avg       0.60      0.50      0.44       771



GROUNDING [ABSENT] & PROMPT [CAPTION_GOODNEUTRALBAD] & CAPTION [BLIP-2]

In [None]:
total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2 = total_defense_memes_captioned_BLIP_2.copy(deep=True)
total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2_images = total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['image'].values
total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2_pillars = total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['pillar'].values
total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2_definitions = total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['pillar_w_definition'].values
total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2_captions = total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['caption'].values
total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['prediction'] = [get_prediction(image, prompt_with_caption_goodneutralbad(pillar, pillar_w_definition, caption)) for image, pillar, pillar_w_definition, caption in zip(total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2_images, total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2_pillars, total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2_definitions, total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2_captions)]
total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/predictions/total_defense_memes_grounding[ABSENT]_caption[BLIP_2]_prompt[CAPTION_GOODNEUTRALBAD]_prediction[BLIP-2].csv', index=False)
total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['prediction'] = total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['prediction'].apply(lambda x: remap(x))
print(total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['prediction'].value_counts())
print(total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['prediction'].isna().sum())
total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['prediction'] = total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['prediction'].apply(lambda x: x if x is not None else np.random.choice(['Against', 'Neutral', 'Supportive']))
print(f1_score(total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['stance'].values, total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['prediction'].values, labels=['Against', 'Neutral', 'Supportive'], average='macro'))
print(classification_report(total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['stance'].values, total_defense_memes_grounding_absent_prompt_with_caption_BLIP_2['prediction'].values, labels=['Against', 'Neutral', 'Supportive']))

Neutral       567
Against       124
Supportive     80
Name: prediction, dtype: int64
0
0.41696640556670933
              precision    recall  f1-score   support

     Against       0.63      0.22      0.33       352
     Neutral       0.49      0.83      0.61       332
  Supportive       0.33      0.30      0.31        87

    accuracy                           0.49       771
   macro avg       0.48      0.45      0.42       771
weighted avg       0.53      0.49      0.45       771

