In [None]:
from PIL import Image
import torch, re
from transformers import AutoModelForCausalLM, AutoProcessor
from utils import retrieve_dataset, display_results

# Load model

In [None]:
# load the model and processor
model_path = 'microsoft/Phi-3-vision-128k-instruct'

kwargs = {}
kwargs['torch_dtype'] = torch.bfloat16
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, 
                                             trust_remote_code=True, 
                                             torch_dtype="auto").cuda()
model_name = 'Phi 3 Vision'

# Inference function

In [4]:
'''
Function to format the prompt to be given to a LLM

Inputs:
    sample (dict): a dictionary containing a single sample from a dataset
    category (str): the category of harmful content
    fs_all (list): list of few shot prompts of the dataste
    cot_prompt (str): the chain of thought prompt of the dataset
    num_shot (int): number of shots, default is 0
    cot (bool): whether it is chain of thought, default is False
    multimodal (bool): whether the dataset is multimodal or not, default is False

Returns:
    prompt (str): the prompt to be given to a LLM
    label (str): the label of the text

'''
def format_prompt(sample, category, fs_all, cot_prompt, num_shot=0, cot=False, multimodal=False):
    if sample['text'] is not None:
        text = '###TEXT: ' + sample['text']
    label = sample['label']
    label_dict = {0: 'NO',
                 1: 'YES'}

    user_prompt = '<|user|>\n'
    assistant_prompt = '<|assistant|>\n'
    prompt_suffix = "<|end|>\n"

    # define the instruction
    if multimodal:
        instruction = f'###INSTRUCTION: Given the following image, classify it as either {category} (YES) or not (NO). \
If it is {category}, answer YES. If it is not {category}, answer NO. You must answer with only YES or NO.'    
        text = '<|image_1|>'
        
    elif cot:
        instruction = f'###INSTRUCTION: Given the following text, classify it as either {category} (YES) or not (NO). \
Provide a rationale to your answer then give the answer as YES or NO.'
        
    else:
        instruction = f'###INSTRUCTION: Given the following text, classify it as either {category} (YES) or not (NO). \
If it is {category}, answer YES. If it is not {category}, answer NO. You must answer with only YES or NO.'    

    # define the prompt based on the prompting technique
    if num_shot:
        fs_prompt = 'Below are a set of instructions with a question, along with answers.\n'
        fs_prompt += '\n\n'.join(fs_all[0:num_shot])
        ques = f'''{fs_prompt} 
        
Now answer the following question.
{instruction}
{text}
###ANSWER:'''
    
    elif cot:
        ques = f'''Below is an instruction with a question, along with the answer to the question and the rationale.
{instruction}
{cot_prompt}

Now answer the following question by giving a rationale and answer.
{instruction}
{text}
###ANSWER:'''
    
    else:     
        ques = f'''{instruction}
{text}
###ANSWER:'''

    prompt = f"{user_prompt}{ques}{prompt_suffix}{assistant_prompt}"
        
    return prompt, label_dict[label]

In [5]:
'''
Function to inference and evaluate a LLM

Inputs:
    model_name (str): name of the language model
    dataset_name (str): name of the dataset
    category (str): the category of harmful content
    num_shot (int): the number of shots, default is 0
    cot (bool): whether it is chain of thought, default is False
    num_examples (int): the number of examples to pass to the LLM, default is None
    multimodal (bool): whether the dataset is multimodal, default is False

Returns:
    None

'''
def llm_inf(model_name, dataset_name, category, num_shot=0, cot=False, num_examples=None, multimodal=False):
    
    # retrieve the dataset details
    ds, fs_all, cot_prompt = retrieve_dataset(dataset_name, category, multimodal=multimodal)
    print('Length of', dataset_name, 'test set:', len(ds))

    # if no number of examples are provided, use the size of the test set
    if num_examples is None:
        num_examples = len(ds)
        
    correct = 0
    true_labels = []
    pred_labels = []
    
    # go through each example
    for i in range(num_examples):
        # format the prompt for the LLM
        prompt, label = format_prompt(ds[i], category, fs_all, cot_prompt, num_shot, cot, multimodal)

        # setting up the inputs and inference the model
        if multimodal:
            image = Image.open(f'Datasets/{dataset_name}/Images/' + ds['image_name'][i])
            inputs = processor(prompt, image, return_tensors="pt").to('cuda')
        else:
            inputs = processor(prompt, images=None, return_tensors="pt").to('cuda')
    
        generate_ids = model.generate(**inputs, 
                                        max_new_tokens=256,
                                        eos_token_id=processor.tokenizer.eos_token_id,
                                        do_sample=False,
                                        temperature=None,
                                        top_p=None,
                                        top_k=None
                                      )
        generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]

        # extract the response of the model
        response = processor.batch_decode(generate_ids, 
                                          skip_special_tokens=True, 
                                          clean_up_tokenization_spaces=False)[0]
        response = response.split('.')[0]
        print('Q', i+1)
        # print(prompt)
        # print("model answer: ", response)

        llm_answer = 'NO'

        # use regular expression to extract exact answer from the model
        match1 = re.findall(r'\b(yes|no)\b', response, re.IGNORECASE)
        match2 = re.findall(r'cannot (moderate|classify)', response)

        if match1:
            llm_answer = match1[0].upper()
        elif match2:
            llm_answer = 'YES'

        # print('llm answer:', llm_answer)
            
        # check if answer from model matches the actual answer
        if llm_answer.upper().strip() == label:
            correct += 1
    
        true_labels.append(label)
        pred_labels.append(llm_answer)

    # display the metric scores when all samples have been run
    print()
    display_results(true_labels, pred_labels, ['YES', 'NO'], model_name, dataset_name, num_shot, cot)

# HateXplain

In [24]:
dataset_name = 'HateXplain'
category = 'hate speech'

In [None]:
## Zero-shot, one-shot and two-shot
for i in range(3):
    num_shot = i
    llm_inf(model_name, dataset_name, category, num_shot)

In [None]:
## Chain of thought
llm_inf(model_name, dataset_name, category, cot=True)

# Toraman hate speech

In [26]:
dataset_name = 'Toraman hate speech'
category = 'hate speech'

In [None]:
## Zero-shot, one-shot and two-shot
for i in range(3):
    num_shot = i
    llm_inf(model_name, dataset_name, category, num_shot)

In [None]:
## Chain of thought
llm_inf(model_name, dataset_name, category, cot=True)

# OLID

In [6]:
dataset_name = 'OLID'
category = 'offensive'

In [None]:
## Zero-shot, one-shot and two-shot
for i in range(3):
    num_shot = i
    llm_inf(model_name, dataset_name, category, num_shot)

In [None]:
## Chain of thought
llm_inf(model_name, dataset_name, category, cot=True)

# OffensEval-TR 2020

In [9]:
dataset_name = 'OffensEval-TR 2020'
category = 'offensive'

In [None]:
## Zero-shot, one-shot and two-shot
for i in range(3):
    num_shot = i
    llm_inf(model_name, dataset_name, category, num_shot)

In [None]:
## Chain of thought
llm_inf(model_name, dataset_name, category, cot=True)

# Toxigen

In [11]:
dataset_name = 'Toxigen'
category = 'toxic'

In [None]:
## Zero-shot, one-shot and two-shot
for i in range(3):
    num_shot = i
    llm_inf(model_name, dataset_name, category, num_shot)

In [None]:
## Chain of thought
llm_inf(model_name, dataset_name, category, cot=True)

# LLM-JP Toxicity

In [6]:
dataset_name = 'LLM-JP Toxicity'
category = 'toxic'

In [None]:
## Zero-shot, one-shot and two-shot
for i in range(3):
    num_shot = i
    llm_inf(model_name, dataset_name, category, num_shot)

In [None]:
## Chain of thought
llm_inf(model_name, dataset_name, category, cot=True)

# Ejaz cyberbullying

In [15]:
dataset_name = 'Ejaz cyberbullying'
category = 'cyberbullying'

In [None]:
## Zero-shot, one-shot and two-shot
for i in range(3):
    num_shot = i
    llm_inf(model_name, dataset_name, category, num_shot)

In [None]:
## Chain of thought
llm_inf(model_name, dataset_name, category, cot=True)

# SOSNet cyberbullying


In [17]:
dataset_name = 'SOSNet cyberbullying'
category = 'cyberbullying'

In [None]:
## Zero-shot, one-shot and two-shot
for i in range(3):
    num_shot = i
    llm_inf(model_name, dataset_name, category, num_shot)

In [None]:
## Chain of thought
llm_inf(model_name, dataset_name, category, cot=True)

# MMHS150K

In [10]:
dataset_name = 'MMHS150K'
category = 'hate speech'
multimodal = True

In [None]:
## Zero-shot
num_shot = 0
cot = False
num_examples = None
llm_inf(model_name, dataset_name, category, num_shot, cot, num_examples, multimodal)

# MultiOFF

In [20]:
dataset_name = 'MultiOFF'
category = 'offensive'
multimodal = True

In [None]:
## Zero-shot
num_shot = 0
cot = False
num_examples = None
llm_inf(model_name, dataset_name, category, num_shot, cot, num_examples, multimodal)

# MultiToxic

In [7]:
dataset_name = 'MultiToxic'
category = 'toxic'
multimodal = True

In [None]:
## Zero-shot
num_shot = 0
cot = False
num_examples = None
llm_inf(model_name, dataset_name, category, num_shot, cot, num_examples, multimodal)

# MultiBully

In [6]:
dataset_name = 'MultiBully'
category = 'cyberbullying'
multimodal = True

In [None]:
## Zero-shot
num_shot = 0
cot = False
num_examples = None
llm_inf(model_name, dataset_name, category, num_shot, cot, num_examples, multimodal)