# Import libraries

In [None]:
!pip install transformers==4.32.0 accelerate tiktoken einops transformers_stream_generator==0.0.4 scipy torchvision pillow tensorboard matplotlib

In [None]:
import os
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig
import torch
torch.manual_seed(1)

# Instantiate model

In [None]:
# Instantiate Qwen-VL-Chat model. via
# https://huggingface.co/Qwen/Qwen-VL-Chat
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat", device_map="cuda", trust_remote_code=True).eval()

# Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Create directories to store inferences and images

In [None]:
os.makedirs('/content/drive/MyDrive/stance_detection_datasets/inferences', exist_ok=True)
os.makedirs('/content/drive/MyDrive/stance_detection_datasets/images_with_grounding', exist_ok=True)

# Import datasets

In [None]:
constraint22_dataset_uspolitics_test = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/constraint22_dataset_uspolitics/constraint22_dataset_uspolitics_test.csv')
constrain22_dataset_covid19_test = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/constrain22_dataset_covid19/constrain22_dataset_covid19_test.csv')
DISARM_test_all = pd.read_csv('/content/drive/MyDrive/stance_detection_datasets/DISARM/DISARM_test_all.csv')

# Balanced sampling

In [None]:
constraint22_dataset_uspolitics_test = constraint22_dataset_uspolitics_test.groupby('role').sample(n=250, random_state=1).reset_index(drop=True)
constrain22_dataset_covid19_test = constrain22_dataset_covid19_test.groupby('role').sample(n=190, random_state=1).reset_index(drop=True)

# Set prompt and define a function to call the model

In [None]:
# Use Qwen-VL-Chat for the inference. via
# https://huggingface.co/Qwen/Qwen-VL-Chat
def draw_bounding_boxes(df, target, prefix):
    ct = 0
    saved_dir_list = []
    for index, row in df.iterrows():
        entity = row[target]
        query = tokenizer.from_list_format([
            {'image': row['image']},
            {'text': f'Find {entity} only, with grounding:'},
        ])
        response, history = model.chat(tokenizer, query=query, history=None)
        saved_dir = '/content/drive/MyDrive/stance_detection_datasets/images_with_grounding/' + prefix + str(ct) + '.png'
        image_with_grounding = tokenizer.draw_bbox_on_latest_picture(response, history)
        if image_with_grounding is not None:
            image_with_grounding.save(saved_dir)
            saved_dir_list.append(saved_dir)
            ct += 1
        else:
            print('failed')
            saved_dir_list.append(row['image'])
    df['image_with_grounding'] = saved_dir_list
    return df

# Call the `draw_bounding_boxes` function and save inferences

In [None]:
constraint22_dataset_uspolitics_test = draw_bounding_boxes(constraint22_dataset_uspolitics_test, 'entity', 'constraint22_dataset_uspolitics_test_')
constraint22_dataset_uspolitics_test.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/constraint22_dataset_uspolitics_test_grounded_Qwen-VL-Chat.csv', index=False)

In [None]:
constrain22_dataset_covid19_test = draw_bounding_boxes(constrain22_dataset_covid19_test, 'entity', 'constrain22_dataset_covid19_test_')
constrain22_dataset_covid19_test.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/constrain22_dataset_covid19_test_grounded_Qwen-VL-Chat.csv', index=False)

In [None]:
DISARM_test_all = draw_bounding_boxes(DISARM_test_all, 'target', 'DISARM_test_all_')
DISARM_test_all.to_csv('/content/drive/MyDrive/stance_detection_datasets/inferences/DISARM_test_all_grounded_Qwen-VL-Chat.csv', index=False)