In [None]:
!pip install -q gdown datasets==2.17.0
!pip install -q transformers
print("transformers")
!pip install -q accelerate
print("accelerate")
!pip install -q torch
print("torch")
!pip install -q safetensors
print("safetensors")
!pip install -q xformers
print("xformers")
!pip install -q langchain==0.1.6
print("langchain")

In [None]:
BASE_PATH = "./SE2024/csqa"

OUTPUT_DATA_PATH = f"{BASE_PATH}/inference_data.jsonl"
OUTPUT_BACKUP_PATH = f"{BASE_PATH}/inference_data_backup.jsonl"

In [None]:
import os
os.makedirs(BASE_PATH, exist_ok=True)

# Prepare dataset

In [None]:
from datasets import load_dataset

In [None]:
df = load_dataset("tau/commonsense_qa", split='validation')[:150]
itr = zip(df['question'], df['choices']['text'], df['answerKey'])

# Prompt Template

In [None]:
from langchain import PromptTemplate,  LLMChain
from langchain import HuggingFacePipeline

template = """ \
I would provide you a question and five options. \
The question is designed in common sense reasoning evaluation format and to answer question, \
you need to choose the best option that is related to the question and is logical. \
You may need to think of the problem from another perspective to find the best answer.

Question: "{question}"

Option 1: "{option_1}"
Option 2: "{option_2}"
Option 3: "{option_3}"
Option 4: "{option_4}"
Option 5: "{option_5}"

o answer this question, you should exactly mention one option, \
so announce the option you think is the best one in the format: \
'Option 1' or 'Option 2' or 'Option 3' or 'Option 4' or 'Option 5':
"""
template = template.strip()

prompt = PromptTemplate(
    template=template,
    input_variables=["question", "option_1", "option_2", "option_3", "option_4", "option_5"],
)

In [None]:
decision_template = """ \
You are given a riddle and four options to choose the answer amongst them. \
The fourth option is "None of the above options". \
Your final task is choosing the best option that is related to the riddle. \
For the first three options, you are given a context that explains a path between the question and the answer. \
Although these contexts may try to say their option is true, you should compare all the options based on the question \
and options' context to choose the one that has the most logical answer. If none of them seem logical, 
choose the fourth option: "None of the above options." \
Now, consider the riddle below and the context provided for you, and tell me which option is \
the best answer to the riddle due to the context. \

Riddle: "{question}"

Options:
Option 1: "{option 1}"
Option 2: "{option 2}"
Option 3: "{option 3}"
Option 4: "None of the above options."

Contexts:
Context about option 1: "{context 1}"
Context about option 2: "{context 2}"
Context about option 3: "{context 3}"

To answer this riddle, you should exactly mention one option, \
so announce the option you think is the best one in the format: 'Option 1' or 'Option 2' or 'Option 3' or 'Option 4':
"""

In [None]:
import numpy as np

def extract_csqa_answer(result: str):
    o1 = result.rfind("Option 1")
    o2 = result.rfind("Option 2")
    o3 = result.rfind("Option 3")
    o4 = result.rfind("Option 4")
    o5 = result.rfind("Option 5")

    answer = np.argmax([o1, o2, o3, o4, o5])
    option_to_answer = {0: "A", 1: "B", 2: "C", 3: "D", 4: "E"}
    return option_to_answer[answer], answer

# Prepare LLM

In [None]:
model_id = "HuggingFaceH4/zephyr-7b-beta"

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    device_map="auto",
    torch_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.bfloat16,
        use_cache=True,
        device_map="auto",
        max_length=4000,
        do_sample=True,
        top_k=5,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
)

In [None]:
from langchain import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': 0.0})

In [None]:
llm_chain = LLMChain(prompt=prompt, llm=llm)

# Inference Utils

In [None]:
import json

def save_inference(data, address):
    with open(address, 'w') as jsonl_file:
        for item in data:
            jsonl_file.write(json.dumps(item) + '\n')
            
def add_inference(data, address):
    with open(address, 'a+') as jsonl_file:
        for item in data:
            jsonl_file.write(json.dumps(item) + '\n')

# Execute Inference

In [None]:
from tqdm.notebook import tqdm

results = []
score = 0

for que, options, key in tqdm(itr, total=150, desc="Inference (CSQA)"):
    data = {"question": que}
    for i, opt in enumerate(options, start=1):
        data[f"option_{i}"] = opt
    result = llm_chain.run(data).strip()
    data["zephyr_raw"] = result
    pred_key, pred = extract_csqa_answer(result)
    data["zephyr_pred"] = str(pred)
    data["zephyr_pred_key"] = pred_key
    data["answer"] = str(key)
    data["score"] = pred_key == key
    add_inference([data], OUTPUT_DATA_PATH)
    results.append(data)
    if pred_key == key:
        score += 1
        print(f"CSQA Score: {round(score/1.5, 3)}%")
        
save_inference(results, OUTPUT_BACKUP_PATH)

print(f"Dumped {len(results)} records to {OUTPUT_DATA_PATH}")