In [1]:
!pip install -U transformers --quiet

# Llama 2 7b - Squadv2

## Dataset

In [2]:
import datasets

ds = datasets.load_dataset("squad_v2")
ds

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 130319
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 11873
    })
})

In [3]:
val = ds["validation"]
val

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 11873
})

In [4]:
val[0]["answers"]

{'text': ['France', 'France', 'France', 'France'],
 'answer_start': [159, 159, 159, 159]}

## Model

In [5]:
import torch

device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [6]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [7]:
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, pipeline

model_id = "meta-llama/Llama-2-7b-chat-hf"

model_config = AutoConfig.from_pretrained(
    pretrained_model_name_or_path=model_id,
    token=True
)

model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', config=model_config)
tokenizer = AutoTokenizer.from_pretrained(model_id)

2023-11-25 12:08:18.373970: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-25 12:08:19.174621: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-11-25 12:08:19.174707: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
)

In [9]:
res = pipeline("How are you?", num_return_sequences=1)
res

[{'generated_text': 'How are you?\n\nAnswer: I\'m doing well, thanks for asking! How about you?\n\nExplanation: This is a common way to respond to someone who has asked how you are doing. It is a polite and friendly way to acknowledge the other person\'s inquiry and show that you are interested in their well-being.\n\nIn this example, the speaker is expressing their well-being using the phrase "I\'m doing well," which is a common way to indicate that everything is going smoothly in their life. The speaker is also showing appreciation for the other person\'s concern by responding with "thanks for asking!"\n\nThis exchange is a good example of how to use polite language and social niceties to build rapport and show respect for the other person\'s feelings and concerns.'}]

In [10]:
res[0].get("generated_text")

'How are you?\n\nAnswer: I\'m doing well, thanks for asking! How about you?\n\nExplanation: This is a common way to respond to someone who has asked how you are doing. It is a polite and friendly way to acknowledge the other person\'s inquiry and show that you are interested in their well-being.\n\nIn this example, the speaker is expressing their well-being using the phrase "I\'m doing well," which is a common way to indicate that everything is going smoothly in their life. The speaker is also showing appreciation for the other person\'s concern by responding with "thanks for asking!"\n\nThis exchange is a good example of how to use polite language and social niceties to build rapport and show respect for the other person\'s feelings and concerns.'

In [11]:
def prompt(context: str, question: str) -> str:
    return f"""Answer the following Question based on the Context only. 
            Only answer from the Context. If you don't know the answer, say 'I don't know'.
            Question: {question}\n\n
            Context: {context}\n\n
            Answer:"""

In [12]:
res = pipeline(prompt(val[-3]["context"], val[0]["question"]),num_return_sequences=1)
res[0].get("generated_text")

"Answer the following Question based on the Context only. \n            Only answer from the Context. If you don't know the answer, say 'I don't know'.\n            Question: In what country is Normandy located?\n\n\n            Context: The pound-force has a metric counterpart, less commonly used than the newton: the kilogram-force (kgf) (sometimes kilopond), is the force exerted by standard gravity on one kilogram of mass. The kilogram-force leads to an alternate, but rarely used unit of mass: the metric slug (sometimes mug or hyl) is that mass that accelerates at 1 m·s−2 when subjected to a force of 1 kgf. The kilogram-force is not a part of the modern SI system, and is generally deprecated; however it still sees use for some purposes as expressing aircraft weight, jet thrust, bicycle spoke tension, torque wrench settings and engine output torque. Other arcane units of force include the sthène, which is equivalent to 1000 N, and the kip, which is equivalent to 1000 lbf.\n\n\n       

In [19]:
from typing import Dict, List

def extract_answer(response: List[Dict[str, str]]) -> str:
    res = response[0].get("generated_text")
    if "Answer:" in res:
        res = res.split("Answer:")[-1]
        res = res.replace("_", "")
        res = res.replace("\n", "")
        res = "" if "don't know" in res else res
    return res

In [20]:
res = pipeline(prompt(val[-3]["context"], val[0]["question"]),num_return_sequences=1)
extract_answer(res)

' '

In [15]:
res = pipeline(prompt(val[0]["context"], val[0]["question"]),num_return_sequences=1)
extract_answer(res)

'Answer the following Question based on the Context only. \n            Only answer from the Context. If you don\'t know the answer, say \'I don\'t know\'.\n            Question: In what country is Normandy located?\n\n\n            Context: The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.\n\n\n          

In [24]:
from tqdm import tqdm

result = []
iterrations = 30

with tqdm(total=iterrations) as pbar:
    for i, _ in enumerate(val):
        if i == iterrations:
            break
            
        # dataset
        question = val[i]["question"]
        context = val[i]["context"]
        answer_list = val[i]["answers"].get("text")
        answer = ""
        is_possible = False
        if len(answer_list) > 0:
            answer = answer_list[0]
            is_possible = True
        
        # call model and extract answer
        res = pipeline(prompt(context=context, question=question), num_return_sequences=1)
        pred = extract_answer(res)
        
        # evaluate answer
        eval = True
        if is_possible and answer not in pred:
            eval = False
            
        result.append(
            {
                "question": question,
                "pred": pred,
                "eval": eval,
                "answer": answer,
                "is_possible": is_possible
            }
        )
        pbar.update(1)

 53%|█████▎    | 16/30 [01:36<01:24,  6.03s/it]


OutOfMemoryError: CUDA out of memory. Tried to allocate 34.00 MiB (GPU 0; 39.25 GiB total capacity; 37.05 GiB already allocated; 6.88 MiB free; 38.01 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [22]:
result

[{'question': 'In what country is Normandy located?',
  'pred': ' France',
  'eval': False,
  'answer': 'France',
  'is_possible': True},
 {'question': 'When were the Normans in Normandy?',
  'pred': ' ',
  'eval': False,
  'answer': '10th and 11th centuries',
  'is_possible': True},
 {'question': 'From which countries did the Norse originate?',
  'pred': '',
  'eval': False,
  'answer': 'Denmark, Iceland and Norway',
  'is_possible': True},
 {'question': 'Who was the Norse leader?',
  'pred': ' Rollo',
  'eval': False,
  'answer': 'Rollo',
  'is_possible': True},
 {'question': 'What century did the Normans first gain their separate identity?',
  'pred': '',
  'eval': False,
  'answer': '10th century',
  'is_possible': True},
 {'question': "Who gave their name to Normandy in the 1000's and 1100's",
  'pred': '         </div>    </div></div>',
  'eval': False,
  'answer': '',
  'is_possible': False},
 {'question': 'What is France a region of?',
  'pred': '',
  'eval': True,
  'answer': 

In [None]:
preds = list(map(lambda r: r["pred"], result))
ans = list(map(lambda r: r["answer"], result))

In [None]:
import evaluate

rouge = evaluate.load('rouge')

In [54]:
rouge_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)