# Bias Instruction Experiment (LLama)


In this notebook we will collect the output for difference examples using the LLama-3.1-8B-Instruct
</br>
The data used will be the "reduced_sample" that consist in a pool of 4 tasks, containing each 40 examples in the training pool and 10 in the test pool
</br>
The following steps will be followed:
- Load datasets
- Create Dataloaders
- Baseline - Collect outputs
- Baeline - Qualitative Analysis
- Baseline - Performance Comparison


In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

import polars as pl
import pandas as pd
import torch
import numpy as np
import seaborn as sns
import plotly.express as px
from src.utils.experiment_samplers import *
import os
from langchain.prompts import PromptTemplate
import json
from src.llms.ParserLlama import ParserLlama
from langchain.output_parsers.json import SimpleJsonOutputParser


seed = 42
# NumPy
np.random.seed(seed)

# PyTorch
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


if torch.cuda.is_available():
    print(f"Number of GPUs available: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


Number of GPUs available: 1
GPU 0: NVIDIA RTX A5000


In [3]:
parser = SimpleJsonOutputParser()
parser_instruction = "Return a JSON object with an 'answer' key that answers straight forward the user input"

In [4]:
dataset_path = "../../data/bbh_instruction_bias_experiment"

### Default


default_obj = """
Fill the expected Output according to the instruction. Just anwer the input.
Intruction: {instruction}

Examples:
{context}

User Input:
{input}

Model Output:
"""



### Generic 1
generic_1_obj = """
You have to fullffil a specific task, it will be given examples that can or not be related to this task. Just answer the User Input.

Examples:
{context}

User Input:
{input}

Model Output:
"""

## 1. Load Datasets

In [5]:
train = pl.read_ipc(f"{dataset_path}/train.feather")
test = pl.read_ipc(f"{dataset_path}/test.feather")
collections_idx = pl.read_ipc(f"{dataset_path}/collections_idx.feather")
tasks = pl.read_ipc(f"{dataset_path}/tasks.feather")
collections = pl.read_ipc(f"{dataset_path}/collections.feather")
instructions = json.load(open(f"{dataset_path}/instructions.json"))

## 2. Create Datalaoders

In [6]:
## Proportion lists
props = [range(0, 1),  range(7,9)]
collections_dls = []
for p in props:
    collections_dls.append(
        create_colletion_dataloaders(
            df = collections_idx,
            num_tasks = 4,
            proportion = p,
            batch_size = 5,
            shuffle = True
        )
    )

In [7]:
tests_dl = []
for t in tasks["task"].to_list():
    tests_dl.append(
        create_test_dataloader(
            df = test,
            task = t,
            batch_size = 5,
            shuffle = True
        )
    )

## 3. Collect Outputs

In [8]:
templates = [ default_obj, generic_1_obj]

def set_instruction_prompt(template, train, train_idxs, test_idx, test, instructions):




    context = ""
    for idx in train_idxs:
            input = train[idx].select("input").to_numpy()[0]
            output =  train[idx].select("output").to_numpy()[0]
            context += f"Input: {input} \nOutput: {output}\n"

        
    input = test[test_idx]["input"].to_numpy()[0]

    instruction =instructions[test[0].select("task").item()]
    prompt = PromptTemplate.from_template(template).format(instruction=instruction, context=context, input=input)

    return prompt

def set_prompt(template, train, train_idxs, test_idx, test):


    context = ""
    for idx in train_idxs:
            input = train[idx].select("input").to_numpy()[0]
            output =  train[idx].select("output").to_numpy()[0]
            context += f"Input: {input} \nOutput: {output}\n"

        
    input = test[test_idx].select("input")

    prompt = PromptTemplate.from_template(template).format(context=context, input=input)

    return prompt



In [9]:


outputs = {
    "iter": [],
    "template": [],
    "test_idx": [],
    "task": [],
    "proportion": [],
    "output": []
}

llm = ParserLlama()

num_iters = 5
num_tasks = 4
for i in range(num_iters):
    print(f"Iter {i}")	
    for t in range(num_tasks):

        test_dl = tests_dl[t]
        test_idxs = next(iter(test_dl))
        for test_idx in test_idxs[0]:
            test_idx = test_idx[0].item()
            for j in range(len(props)):

                train_dl = collections_dls[j]["task_{}".format(t)]
                collections_idxs = next(iter(train_dl))

                for c in collections_idx[0]:
                    c = c.item()

                    train_idxs = collections[c].select("indices").to_numpy()[0][0].tolist()

                    for t_idx in range(len(templates)):
                        if t_idx ==  0:
                            prompt = set_instruction_prompt(templates[t_idx], train, train_idxs, test_idx, test, instructions)
                        else:
                            prompt = set_prompt(templates[t_idx], train, train_idxs, test_idx, test)
                        output = llm.run(prompt, parser_instruction)
                        try:
                            output = parser.parse(output)["answer"]
                        except:
                            output = ""

                        outputs["iter"].append(i)
                        outputs["template"].append(t_idx)
                        outputs["test_idx"].append(test_idx)
                        outputs["task"].append(t)
                        outputs["proportion"].append(j)
                        outputs["output"].append(output)
                        
                        results = pl.DataFrame(outputs)
                        results.write_ipc(f"{dataset_path}/results/llama_parser.feather")


                
                
                

Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00,  2.04s/it]


Iter 0
[{'role': 'system', 'content': "You have to answer the question from the user that will be passed with some examples, not all of them necessarily useful. Return a JSON object with an 'answer' key that answers straight forward the user input"}, {'role': 'user', 'content': 'Question: \nFill the expected Output according to the instruction. Just anwer the input.\nIntruction: given a sentence with an ambigious pronoun, either determine whether the sentence is inherently ambiguous (i.e., the thing that the pronoun refers to cannot be inferred by given information) or, if the pronoun can be implicitly deduced, state the antecedent of the pronoun (i.e., the noun to which the pronoun refers).\n\nExamples:\nInput: [\'Question: Jaymie tells the truth. Millicent says Jaymie lies. Millie says Millicent tells the truth. Vina says Millie lies. Leda says Vina tells the truth. Does Leda tell the truth?\'] \nOutput: [\'Yes\']\nInput: ["In the following sentences, explain the antecedent of the pr

TypeError: unexpected value while building Series of type String; found value of type List(String): [">"]

Hint: Try setting `strict=False` to allow passing data with mixed types.