[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/gsarti/ik-nlp-tutorials/blob/main/notebooks/W6T_Advanced_Prompting_Generation.ipynb)

In [None]:
# Run in Notebook to install local packages
!pip install torch transformers bitsandbytes accelerate rank_bm25 outlines datasets



In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

checkpoint = "Qwen/Qwen2-0.5b-instruct"

# Configure 8-bit quantization. We use this to save VRAM, as we don't have a lot available.
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True  # Enables 8-bit quantization
)

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=bnb_config,  # Apply BitsAndBytesConfig
    device_map="cuda"   # Assign to GPU
)

In [None]:
# Load in Sciq dataset
import pandas as pd

splits = {'train': 'data/train-00000-of-00001.parquet', 'validation': 'data/validation-00000-of-00001.parquet', 'test': 'data/test-00000-of-00001.parquet'}
df_train = pd.read_parquet("hf://datasets/allenai/sciq/" + splits["train"])
df_val = pd.read_parquet("hf://datasets/allenai/sciq/" + splits["validation"])
df_test = pd.read_parquet("hf://datasets/allenai/sciq/" + splits["test"])

# Merge all datasets into one
df_all = pd.concat([df_train, df_val], ignore_index=True)


In [None]:
experiments = ["baseline_answer", "question_context","previous_questions_context", "baseline_answer_A", "question_context_A","previous_questions_context_A"]
prev_row = None
prev2_row = None
original_context = "You are a QA system that only answers with a singular letter as an answer"

for experiment in experiments:
  for idx, row in df_all.iterrows():
    if "_A" in experiment:
      to_ask = (
      row["question"] +
      " A " + row["correct_answer"] +
      ", B " + row["distractor2"] +
      ", C " + row["distractor1"] +
      ", D " + row["distractor3"]
      )
    else:
      to_ask = (
        row["question"] +
        " A " + row["distractor3"] +
        ", B " + row["distractor2"] +
        ", C " + row["distractor1"] +
        ", D " + row["correct_answer"]
      )
    if "n_context" in experiment:
        context = (
            original_context +
            row["support"]
        )
    elif "previous" in experiment:
        context = (
            original_context +
            row["support"] +
            prev_row["support"] +
            prev2_row["support"]
        )

    else:
        context = original_context
    prev2_row = prev_row
    prev_row = row
    messages = [
      {
          "role": "system",
          "content": context,
      },
      {"role": "user", "content": to_ask},
      ]
    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(tokenized_chat, max_new_tokens=128)
    answer = tokenizer.decode(outputs[0]).splitlines()[-1][0]
    df_all.loc[idx, experiment] = answer

In [None]:
df = pd.DataFrame(df_all)
df.to_csv("qwen_small.csv")


In [None]:
# EXTRA EXPERIMENTS AFTER EVALUATION!!!

experiments = ["harder_context", "harder_context_A", "position_middle", "position_end"]
original_context = "You are a QA system that only answers with a singular letter as an answer"
harder_context = pd.read_csv("harder_context.csv")[:50]
tf = []

for experiment in experiments:
  for idx, row in df_all.iterrows():
    if "_A" in experiment:
      to_ask = (
      row["question"] +
      " A " + row["correct_answer"] +
      ", B " + row["distractor2"] +
      ", C " + row["distractor1"] +
      ", D " + row["distractor3"]
      )
    else:
      to_ask = (
        row["question"] +
        " A " + row["distractor3"] +
        ", B " + row["distractor2"] +
        ", C " + row["distractor1"] +
        ", D " + row["correct_answer"]
      )


    if "harder_" in experiment:
        context = (
            original_context +
            row["support"] +
            "".join([str(support) for support in harder_context["support"].tolist()])


        )

    elif "_middle" in experiment:
        context = (
            original_context +
            prev_rows[0] +
            row["support"] +
            prev_rows[1] )

    elif "_end" in experiment:
        context = (
            original_context +
            prev_rows[0] +
            prev_rows[1] +
            row["support"] )


    prev_rows.pop(0)
    prev_rows.append(row["support"])

    messages = [
      {
          "role": "system",
          "content": context,
      },
      {"role": "user", "content": to_ask},
      ]


    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(tokenized_chat, max_new_tokens=128)

    decoded_output = tokenizer.decode(outputs[0])
    output_lines = decoded_output.splitlines()

    if output_lines and output_lines[-1]:
        answer = output_lines[-1][0]
        df_all.loc[idx, experiment] = answer
    else:
        df_all.loc[idx, experiment] = ""
        tf.append(output_lines[-1])

In [None]:
df = pd.DataFrame(df_all)
df.to_csv("extra_qwen_small1.csv")


In [None]:
# EXTRA EXPERIMENTS AFTER EVALUATION!!!

experiments = ["position_middle_A", "position_end_A", "prev_20", "prev_20_A"]
prev_rows = ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
original_context = "You are a QA system that only answers with a singular letter as an answer"
tf = []

for experiment in experiments:
  for idx, row in df_all.iterrows():
    if "_A" in experiment:
      to_ask = (
      row["question"] +
      " A " + row["correct_answer"] +
      ", B " + row["distractor2"] +
      ", C " + row["distractor1"] +
      ", D " + row["distractor3"]
      )
    else:
      to_ask = (
        row["question"] +
        " A " + row["distractor3"] +
        ", B " + row["distractor2"] +
        ", C " + row["distractor1"] +
        ", D " + row["correct_answer"]
      )


    if "_middle" in experiment:
        context = (
            original_context +
            prev_rows[0] +
            row["support"] +
            prev_rows[1] )

    elif "_end" in experiment:
        context = (
            original_context +
            prev_rows[0] +
            prev_rows[1] +
            row["support"] )


    else:
        context = (
            original_context +
            row["support"] +
            prev_rows[0] +
            prev_rows[1] +
            prev_rows[2] +
            prev_rows[3] +
            prev_rows[4] +
            prev_rows[5] +
            prev_rows[6] +
            prev_rows[7] +
            prev_rows[8] +
            prev_rows[9] +
            prev_rows[10] +
            prev_rows[11] +
            prev_rows[12] +
            prev_rows[13] +
            prev_rows[14] +
            prev_rows[15] +
            prev_rows[16] +
            prev_rows[17] +
            prev_rows[18] +
            prev_rows[19]
        )



    prev_rows.pop(0)
    prev_rows.append(row["support"])

    messages = [
      {
          "role": "system",
          "content": context,
      },
      {"role": "user", "content": to_ask},
      ]


    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(tokenized_chat, max_new_tokens=128)

    decoded_output = tokenizer.decode(outputs[0])
    output_lines = decoded_output.splitlines()

    if output_lines and output_lines[-1]:
        answer = output_lines[-1][0]
        df_all.loc[idx, experiment] = answer
    else:
        df_all.loc[idx, experiment] = ""
        tf.append(output_lines[-1])

In [None]:
df = pd.DataFrame(df_all)
df.to_csv("extra_qwen_small2.csv")
