# Flan-T5 (base) Squadv2

## Dataset

In [1]:
from datasets import load_dataset

ds = load_dataset("squad_v2")
ds

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 130319
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 11873
    })
})

In [2]:
trn = ds["train"]
val = ds["validation"]

## Model

In [3]:
import torch

device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [6]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_id = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id).to(device)

In [7]:
def prompt(context: str, question: str) -> str:
    return f"""Answer the following Question based on the Context only. 
            Only answer from the Context. If you don't know the answer, say 'I don't know'.
            Question: {question}\n\n
            Context: {context}\n\n
            Answer:"""

In [8]:
inputs = tokenizer(prompt(val[0]["context"], val[0]["question"]), return_tensors='pt').to(device)
output = tokenizer.decode(
    model.generate(
        inputs["input_ids"], 
        max_new_tokens=200,
    )[0], 
    skip_special_tokens=True
)

2023-11-25 13:51:26.542195: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-25 13:51:27.304791: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-11-25 13:51:27.304885: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64


In [13]:
def generate(prompt: str) -> str:
    inputs = tokenizer(prompt, return_tensors='pt').to(device)
    return tokenizer.decode(
        model.generate(
            inputs["input_ids"], 
            max_new_tokens=200,
        )[0], 
        skip_special_tokens=True
    )

In [25]:
# Test answer
pred = generate(prompt(context=val[0]["context"], question=val[0]["question"]))
print(f"""
# Context:
{val[0]['context']}\n\n
# Question:
{val[0]['question']}\n\n
# Model:
{pred}\n\n
# Answers:\n
{val[0]['answers'].get("text")}
"""
)


# Context:
The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.


# Question:
In what country is Normandy located?


# Model:
France


# Answers:

['France', 'France', 'France', 'France']



In [29]:
# Test wrong context question pair
pred = generate(prompt(context=val[-1]["context"], question=val[5]["question"]))
print(f"""
# Context:
{val[-1]['context']}\n\n
# Question:
{val[5]['question']}\n\n
# Model:
{pred}\n\n
# Answers:\n
I don't know
"""
)


# Context:
The pound-force has a metric counterpart, less commonly used than the newton: the kilogram-force (kgf) (sometimes kilopond), is the force exerted by standard gravity on one kilogram of mass. The kilogram-force leads to an alternate, but rarely used unit of mass: the metric slug (sometimes mug or hyl) is that mass that accelerates at 1 m·s−2 when subjected to a force of 1 kgf. The kilogram-force is not a part of the modern SI system, and is generally deprecated; however it still sees use for some purposes as expressing aircraft weight, jet thrust, bicycle spoke tension, torque wrench settings and engine output torque. Other arcane units of force include the sthène, which is equivalent to 1000 N, and the kip, which is equivalent to 1000 lbf.


# Question:
Who gave their name to Normandy in the 1000's and 1100's


# Model:
I don't know


# Answers:

I don't know



In [31]:
def evaluate_answer(is_possible: bool, pred: str, answer: str) -> bool:
    if is_possible:
        return answer in pred

    return "don't know" in pred

In [32]:
from tqdm import tqdm

result = []
iterrations = 30

with tqdm(total=iterrations) as pbar:
    for i, _ in enumerate(val):
        if i == iterrations:
            break
            
        # dataset
        question = val[i]["question"]
        context = val[i]["context"]
        answer_list = val[i]["answers"].get("text")
        answer = ""
        is_possible = False
        if len(answer_list) > 0:
            answer = answer_list[0]
            is_possible = True
        
        # call model and extract answer
        p = prompt(context=context, question=question)
        pred = generate(p)
        
        # evaluate answer
        eval = evaluate_answer(is_possible, pred, answer)
            
        result.append(
            {
                "question": question,
                "pred": pred,
                "eval": eval,
                "answer": answer,
                "is_possible": is_possible
            }
        )
        pbar.update(1)

100%|██████████| 30/30 [00:02<00:00, 13.42it/s]


In [33]:
result

[{'question': 'In what country is Normandy located?',
  'pred': 'France',
  'eval': True,
  'answer': 'France',
  'is_possible': True},
 {'question': 'When were the Normans in Normandy?',
  'pred': '10th and 11th centuries',
  'eval': True,
  'answer': '10th and 11th centuries',
  'is_possible': True},
 {'question': 'From which countries did the Norse originate?',
  'pred': 'Denmark, Iceland and Norway',
  'eval': True,
  'answer': 'Denmark, Iceland and Norway',
  'is_possible': True},
 {'question': 'Who was the Norse leader?',
  'pred': 'Rollo',
  'eval': True,
  'answer': 'Rollo',
  'is_possible': True},
 {'question': 'What century did the Normans first gain their separate identity?',
  'pred': '10th',
  'eval': False,
  'answer': '10th century',
  'is_possible': True},
 {'question': "Who gave their name to Normandy in the 1000's and 1100's",
  'pred': 'The Normans',
  'eval': False,
  'answer': '',
  'is_possible': False},
 {'question': 'What is France a region of?',
  'pred': 'Fran