[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/gsarti/ik-nlp-tutorials/blob/main/notebooks/W6T_Advanced_Prompting_Generation.ipynb)

In [1]:
# Run in Notebook to install local packages
!pip install torch transformers bitsandbytes accelerate rank_bm25 outlines datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Collecting outlines
  Downloading outlines-1.0.2-py3-none-any.whl.metadata (27 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-man

In [18]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

checkpoint = "Qwen/Qwen2.5-1.5B-Instruct"

# Configure 8-bit quantization. We use this to save VRAM, as we don't have a lot available.
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True  # Enables 8-bit quantization
)

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=bnb_config,  # Apply BitsAndBytesConfig
    device_map="cuda"   # Assign to GPU
)






In [19]:
# Load in Sciq dataset
import pandas as pd

splits = {'train': 'data/train-00000-of-00001.parquet', 'validation': 'data/validation-00000-of-00001.parquet', 'test': 'data/test-00000-of-00001.parquet'}
df_train = pd.read_parquet("hf://datasets/allenai/sciq/" + splits["train"])
df_val = pd.read_parquet("hf://datasets/allenai/sciq/" + splits["validation"])

# Merge all datasets into one
df_all = pd.concat([df_train, df_val], ignore_index=True)


In [None]:
# Divide the contexts into small, medium, long
small_df = pd.DataFrame(columns=df_all.columns)
medium_df = pd.DataFrame(columns=df_all.columns)
long_df = pd.DataFrame(columns=df_all.columns)

small = 0
medium = 0
longg = 0

for idx, row in df_all.iterrows():
  if len(row["support"]) < 100:
    if small < 1000:
      small_df.loc[idx] = row
    small += 1
  elif 100 < len(row["support"]) < 500:
    if medium < 1000:
      medium_df.loc[idx] = row
    medium += 1
  else:
    if longg < 1000:
      long_df.loc[idx] = row
    longg += 1


In [None]:
experiments = ["long", "long_a", "medium","medium_a", "small", "small_a"]
original_context = "You are a QA system that only answers with a singular letter as an answer"

for experiment in experiments:
  if experiment == "long":
    used_df = long_df
  elif experiment == "long_a":
    used_df = long_df
  elif experiment == "medium":
    used_df = medium_df
  elif experiment == "medium_a":
    used_df = medium_df
  elif experiment == "small":
    used_df = small_df
  elif experiment == "small_a":
    used_df = small_df

  for idx, row in used_df.iterrows():
    if "_A" in experiment:
      to_ask = (
      row["question"] +
      " A " + row["correct_answer"] +
      ", B " + row["distractor2"] +
      ", C " + row["distractor1"] +
      ", D " + row["distractor3"]
      )
    else:
      to_ask = (
        row["question"] +
        " A " + row["distractor3"] +
        ", B " + row["distractor2"] +
        ", C " + row["distractor1"] +
        ", D " + row["correct_answer"]
      )

    context = (
          original_context +
          row["support"]
      )


    messages = [
      {
          "role": "system",
          "content": context,
      },
      {"role": "user", "content": to_ask},
      ]
    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(tokenized_chat, max_new_tokens=128)
    answer = tokenizer.decode(outputs[0]).splitlines()[-1][0]
    used_df.loc[idx, experiment] = answer


  df = pd.DataFrame(used_df)
  df.to_csv("additional_med_" + experiment + ".csv")

In [None]:
experiments = ["harder_context_2", "harder_context_A_2", "harder_context_20", "harder_context_20_A""harder_context_50", "harder_context_50_A"]
original_context = "You are a QA system that only answers with a singular letter as an answer"
harder_context_2 = pd.read_csv("harder_context.csv")[:2]
harder_context_20 = pd.read_csv("harder_context.csv")[:20]
harder_context_50 = pd.read_csv("harder_context.csv")[:50]
df_all = pd.concat([df_train, df_val], ignore_index=True)[:1000]
tf = []

for experiment in experiments:
  for idx, row in df_all.iterrows():
    if "_A" in experiment:
      to_ask = (
      row["question"] +
      " A " + row["correct_answer"] +
      ", B " + row["distractor2"] +
      ", C " + row["distractor1"] +
      ", D " + row["distractor3"]
      )
    else:
      to_ask = (
        row["question"] +
        " A " + row["distractor3"] +
        ", B " + row["distractor2"] +
        ", C " + row["distractor1"] +
        ", D " + row["correct_answer"]
      )

    if experiment == "harder_context_2" or experiment == "harder_context_A_2":
      context = (
            original_context +
            row["support"] +
            "".join([str(support) for support in harder_context_2["support"].tolist()])
      )

    elif experiment == "harder_context_20" or experiment == "harder_context_20_A":
      context = (
            original_context +
            row["support"] +
            "".join([str(support) for support in harder_context_20["support"].tolist()])
      )
    else:
      context = (
            original_context +
            row["support"] +
            "".join([str(support) for support in harder_context_50["support"].tolist()])
        )


    messages = [
      {
          "role": "system",
          "content": context,
      },
      {"role": "user", "content": to_ask},
      ]


    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(tokenized_chat, max_new_tokens=128)

    decoded_output = tokenizer.decode(outputs[0])
    output_lines = decoded_output.splitlines()

    if output_lines and output_lines[-1]:
        answer = output_lines[-1][0]
        df_all.loc[idx, experiment] = answer
    else:
        df_all.loc[idx, experiment] = ""
        tf.append(output_lines[-1])

In [None]:
df = pd.DataFrame(df_all)
df.to_csv("additional_med_harder.csv")

In [20]:
experiments = ["original_support", "original_support_a", "sys_original_support", "sys_original_support_a"]
original_context = "You are a QA system that only answers with a singular letter as an answer"
df_all = pd.concat([df_train, df_val], ignore_index=True)[:1000]
prev_rows = ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
tf = []

for experiment in experiments:
  for idx, row in df_all.iterrows():
    if "sys" not in experiment:
      if "_a" in experiment:
        to_ask = (
              row["support"] +
              row["question"] +
              " A " + row["correct_answer"] +
              ", B " + row["distractor2"] +
              ", C " + row["distractor1"] +
              ", D " + row["distractor3"]
              )



      else:
        to_ask = (
            row["support"] +
            row["question"] +
            " A " + row["distractor3"] +
            ", B " + row["distractor2"] +
            ", C " + row["distractor1"] +
            ", D " + row["correct_answer"]
          )
    else:
      if "_a" in experiment:
        to_ask = (
              row["question"] +
              " A " + row["correct_answer"] +
              ", B " + row["distractor2"] +
              ", C " + row["distractor1"] +
              ", D " + row["distractor3"]
          )
      else:
          to_ask =  (
              row["question"] +
              " A " + row["distractor3"] +
              ", B " + row["distractor2"] +
              ", C " + row["distractor1"] +
              ", D " + row["correct_answer"])
    context = (
            original_context +
            row["support"]
        )






    messages = [
      {
          "role": "system",
          "content": context,
      },
      {"role": "user", "content": to_ask},
      ]



    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(tokenized_chat, max_new_tokens=128)

    decoded_output = tokenizer.decode(outputs[0])
    output_lines = decoded_output.splitlines()

    if output_lines and output_lines[-1]:
        answer = output_lines[-1][0]
        df_all.loc[idx, experiment] = answer
    else:
        df_all.loc[idx, experiment] = ""
        tf.append(output_lines[-1])

original_support 0
original_support 1
original_support 2
original_support 3
original_support 4
original_support 5
original_support 6
original_support 7
original_support 8
original_support 9
original_support 10
original_support 11
original_support 12
original_support 13
original_support 14
original_support 15
original_support 16
original_support 17
original_support 18
original_support 19
original_support 20
original_support 21
original_support 22
original_support 23
original_support 24
original_support 25
original_support 26
original_support 27
original_support 28
original_support 29
original_support 30
original_support 31
original_support 32
original_support 33
original_support 34
original_support 35
original_support 36
original_support 37
original_support 38
original_support 39
original_support 40
original_support 41
original_support 42
original_support 43
original_support 44
original_support 45
original_support 46
original_support 47
original_support 48
original_support 49
original_s

In [21]:
df = pd.DataFrame(df_all)
df.to_csv("sys_med_support.csv")

In [22]:
checkpoint = "Qwen/Qwen2-0.5b-instruct"

# Configure 8-bit quantization. We use this to save VRAM, as we don't have a lot available.
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True  # Enables 8-bit quantization
)

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    quantization_config=bnb_config,  # Apply BitsAndBytesConfig
    device_map="cuda"   # Assign to GPU
)

In [None]:
experiments = ["long", "long_a", "medium","medium_a", "small", "small_a"]
original_context = "You are a QA system that only answers with a singular letter as an answer"

for experiment in experiments:
  if experiment == "long":
    used_df = long_df
  elif experiment == "long_a":
    used_df = long_df
  elif experiment == "medium":
    used_df = medium_df
  elif experiment == "medium_a":
    used_df = medium_df
  elif experiment == "small":
    used_df = small_df
  elif experiment == "small_a":
    used_df = small_df

  for idx, row in used_df.iterrows():
    if "_A" in experiment:
      to_ask = (
      row["question"] +
      " A " + row["correct_answer"] +
      ", B " + row["distractor2"] +
      ", C " + row["distractor1"] +
      ", D " + row["distractor3"]
      )

    else:
      to_ask = (
        row["question"] +
        " A " + row["distractor3"] +
        ", B " + row["distractor2"] +
        ", C " + row["distractor1"] +
        ", D " + row["correct_answer"]
      )

    context = (
          original_context +
          row["support"]
      )


    messages = [
      {
          "role": "system",
          "content": context,
      },
      {"role": "user", "content": to_ask},
      ]
    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(tokenized_chat, max_new_tokens=128)
    answer = tokenizer.decode(outputs[0]).splitlines()[-1][0]
    used_df.loc[idx, experiment] = answer

  df = pd.DataFrame(used_df)
  df.to_csv("additional_small_" + experiment + ".csv")

In [None]:
experiments = ["harder_context_2", "harder_context_A_2", "harder_context_20", "harder_context_20_A", "harder_context_50", "harder_context_50_A"]]
original_context = "You are a QA system that only answers with a singular letter as an answer"
harder_context_2 = pd.read_csv("harder_context.csv")[:2]
harder_context_20 = pd.read_csv("harder_context.csv")[:20]
harder_context_50 = pd.read_csv("harder_context.csv")[:50]
df_all = pd.concat([df_train, df_val], ignore_index=True)[:1000]
tf = []

for experiment in experiments:
  for idx, row in df_all.iterrows():
    if "_A" in experiment:
      to_ask = (
      row["question"] +
      " A " + row["correct_answer"] +
      ", B " + row["distractor2"] +
      ", C " + row["distractor1"] +
      ", D " + row["distractor3"]
      )
    else:
      to_ask = (
        row["question"] +
        " A " + row["distractor3"] +
        ", B " + row["distractor2"] +
        ", C " + row["distractor1"] +
        ", D " + row["correct_answer"]
      )

    if experiment == "harder_context_2" or experiment == "harder_context_A_2":
      context = (
            original_context +
            row["support"] +
            "".join([str(support) for support in harder_context_2["support"].tolist()])
      )

    elif experiment == "harder_context_20" or experiment == "harder_context_20_A":
      context = (
            original_context +
            row["support"] +
            "".join([str(support) for support in harder_context_20["support"].tolist()])
      )
    else:
      context = (
            original_context +
            row["support"] +
            "".join([str(support) for support in harder_context_50["support"].tolist()])
        )


    messages = [
      {
          "role": "system",
          "content": context,
      },
      {"role": "user", "content": to_ask},
      ]


    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(tokenized_chat, max_new_tokens=128)

    decoded_output = tokenizer.decode(outputs[0])
    output_lines = decoded_output.splitlines()

    if output_lines and output_lines[-1]:
        answer = output_lines[-1][0]
        df_all.loc[idx, experiment] = answer
    else:
        df_all.loc[idx, experiment] = ""
        tf.append(output_lines[-1])

In [None]:
df = pd.DataFrame(df_all)
df.to_csv("additional_small_harder.csv")

In [23]:
experiments = ["original_support", "original_support_a", "sys_original_support", "sys_original_support_a"]
original_context = "You are a QA system that only answers with a singular letter as an answer"
df_all = pd.concat([df_train, df_val], ignore_index=True)[:1000]
prev_rows = ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
tf = []

for experiment in experiments:
  for idx, row in df_all.iterrows():
    if "sys" not in experiment:
      if "_a" in experiment:
        to_ask = (
              row["support"] +
              row["question"] +
              " A " + row["correct_answer"] +
              ", B " + row["distractor2"] +
              ", C " + row["distractor1"] +
              ", D " + row["distractor3"]
              )



      else:
        to_ask = (
            row["support"] +
            row["question"] +
            " A " + row["distractor3"] +
            ", B " + row["distractor2"] +
            ", C " + row["distractor1"] +
            ", D " + row["correct_answer"]
          )
    else:
      if "_a" in experiment:
        to_ask = (
              row["question"] +
              " A " + row["correct_answer"] +
              ", B " + row["distractor2"] +
              ", C " + row["distractor1"] +
              ", D " + row["distractor3"]
          )
      else:
          to_ask =  (
              row["question"] +
              " A " + row["distractor3"] +
              ", B " + row["distractor2"] +
              ", C " + row["distractor1"] +
              ", D " + row["correct_answer"])
    context = (
            original_context +
            row["support"]
        )






    messages = [
      {
          "role": "system",
          "content": context,
      },
      {"role": "user", "content": to_ask},
      ]



    tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
    outputs = model.generate(tokenized_chat, max_new_tokens=128)

    decoded_output = tokenizer.decode(outputs[0])
    output_lines = decoded_output.splitlines()

    if output_lines and output_lines[-1]:
        answer = output_lines[-1][0]
        df_all.loc[idx, experiment] = answer
    else:
        df_all.loc[idx, experiment] = ""
        tf.append(output_lines[-1])

original_support 0
original_support 1
original_support 2
original_support 3
original_support 4
original_support 5
original_support 6
original_support 7
original_support 8
original_support 9
original_support 10
original_support 11
original_support 12
original_support 13
original_support 14
original_support 15
original_support 16
original_support 17
original_support 18
original_support 19
original_support 20
original_support 21
original_support 22
original_support 23
original_support 24
original_support 25
original_support 26
original_support 27
original_support 28
original_support 29
original_support 30
original_support 31
original_support 32
original_support 33
original_support 34
original_support 35
original_support 36
original_support 37
original_support 38
original_support 39
original_support 40
original_support 41
original_support 42
original_support 43
original_support 44
original_support 45
original_support 46
original_support 47
original_support 48
original_support 49
original_s

In [24]:
df = pd.DataFrame(df_all)
df.to_csv("sys_small_support.csv")