In [1]:
from llama2 import *
from typing import List, Literal, Optional, Tuple, TypedDict
import pandas as pd
import datasets
import string
from evaluate import evaluator
from evaluate import load

from torch.utils.data import Dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_size='int8'
max_samples=10

In [3]:
# Parameters
model_size = "int4"
max_samples = -1


## 1 - Load model

In [4]:
#papermill_description=LOADING_MODEL
model_name = "meta-llama/Llama-2-7b-chat-hf"

model = LlamaModel(
    model_name=model_name,
    model_resolution=model_size
)

model.model.to = lambda x: x # Disable device copying

Loading checkpoint shards:   0%|                                                        | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:  50%|████████████████████████                        | 1/2 [00:01<00:01,  1.08s/it]

Loading checkpoint shards: 100%|████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.46it/s]

Loading checkpoint shards: 100%|████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.35it/s]




## 2 - Load dataset

In [5]:
#papermill_description=LOADING_DATA
dataset = datasets.load_dataset('squad_v2', split='validation')

In [6]:
dataset

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 11873
})

In [7]:
dataset[5]

{'id': '5ad39d53604f3c001a3fe8d1',
 'title': 'Normans',
 'context': 'The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.',
 'question': "Who gave their name to Normandy in the 1000's and 1100's",
 'answers': {'text': [], 'answer_start': []}}

## 3 - Define data prep and model inference functions

In [8]:
def format_question(sample: dict) -> str:
    """Format a sample from the squad V2 dataset to question answer string."""
    pretext = (f'{model.B_INST} You are performing extractive reading comprehension; '
               'given a question and a paragraph from an article, respond only with a '
               'direct extract from the article that answers the question and do not use your own '
               'prior knowledge.  If no direct extract from the context can answer the question, '
               f'respond with an empty quote.'
               f' {model.E_INST}\n')  # Llama system directive
    q_a = (f'Article: {sample["title"]}\n'
         f'Paragraph: {sample["context"]}\n\n'
         f'Question: {sample["question"]}\n\n'
         f'Answer: ')
    
    return pretext + q_a


def squad_inference(df: pd.DataFrame, model) -> pd.DataFrame:
    """Predict the output extracts for all samples in the input squad format dataset"""
    df_val = df.copy(deep=True)
    df_val['prediction_text'] = ''
    df_val['no_answer_probability'] = 0.
    df_val['pred_start'] = -1

    for idx in range(len(df)):
        x = format_question(df.iloc[idx])
        y_pred = model.generate(x) # TODO: get prob of </s> token on output as no_answer_probability
        
        y_pred_clean = y_pred[len(x)+3:-4].strip().strip(string.punctuation) # Remove start / end tags + whitespace
        df_val.loc[idx, 'prediction_text'] = y_pred_clean
        if y_pred_clean != '':
            df_val.loc[idx, 'pred_start'] = str(df_val.context.iloc[idx]).find(y_pred_clean)

    return df_val

In [9]:
df = dataset.to_pandas()
df['prediction'] = ''
df['pred_start'] = -1

idx = 9
x = format_question(dataset[idx])
print(x)

y_pred = model.generate(x)
print(y_pred)
y_pred_clean = y_pred[len(x)+3:-4].strip().strip(string.punctuation) # Remove start / end tags + whitespace
df.loc[idx, 'prediction'] = y_pred_clean
if y_pred_clean != '':
    df.loc[idx, 'pred_start'] = str(df.context.iloc[idx]).find(y_pred_clean)

[INST] You are performing extractive reading comprehension; given a question and a paragraph from an article, respond only with a direct extract from the article that answers the question and do not use your own prior knowledge.  If no direct extract from the context can answer the question, respond with an empty quote. [/INST]
Article: Normans
Paragraph: The Norman dynasty had a major political, cultural and military impact on medieval Europe and even the Near East. The Normans were famed for their martial spirit and eventually for their Christian piety, becoming exponents of the Catholic orthodoxy into which they assimilated. They adopted the Gallo-Romance language of the Frankish land they settled, their dialect becoming known as Norman, Normaund or Norman French, an important literary language. The Duchy of Normandy, which they formed by treaty with the French crown, was a great fief of medieval France, and under Richard I of Normandy was forged into a cohesive and formidable princ

<s> [INST] You are performing extractive reading comprehension; given a question and a paragraph from an article, respond only with a direct extract from the article that answers the question and do not use your own prior knowledge.  If no direct extract from the context can answer the question, respond with an empty quote. [/INST]
Article: Normans
Paragraph: The Norman dynasty had a major political, cultural and military impact on medieval Europe and even the Near East. The Normans were famed for their martial spirit and eventually for their Christian piety, becoming exponents of the Catholic orthodoxy into which they assimilated. They adopted the Gallo-Romance language of the Frankish land they settled, their dialect becoming known as Norman, Normaund or Norman French, an important literary language. The Duchy of Normandy, which they formed by treaty with the French crown, was a great fief of medieval France, and under Richard I of Normandy was forged into a cohesive and formidable p

In [10]:
df.loc[idx]

id                                     56dddf4066d3e219004dad5f
title                                                   Normans
context       The Norman dynasty had a major political, cult...
question            Who was the duke in the battle of Hastings?
answers       {'text': ['William the Conqueror', 'William th...
prediction                                William the Conqueror
pred_start                                                 1022
Name: 9, dtype: object

In [11]:
#papermill_description=RUNNING_INFERENCE
pd_dataset = dataset.to_pandas()
if max_samples > 0 and max_samples < len(pd_dataset):
    pd_dataset = pd_dataset.iloc[:max_samples - 1]
df2 = squad_inference(pd_dataset, model)

In [12]:
df2.head(1)

Unnamed: 0,id,title,context,question,answers,prediction_text,no_answer_probability,pred_start
0,56ddde6b9a695914005b9628,Normans,The Normans (Norman: Nourmands; French: Norman...,In what country is Normandy located?,"{'text': ['France', 'France', 'France', 'Franc...",France,0.0,159


## 5 - Evaluate performance

In [13]:
#papermill_description=EVALUATION
squad_v2_metric = load("squad_v2")

predictions = df2[['prediction_text', 'no_answer_probability', 'id']].to_dict('records')
answers = df2[['answers', 'id']].to_dict('records')

results = squad_v2_metric.compute(predictions=predictions, references=answers)
results

{'exact': 11.142929335467025,
 'f1': 22.407302595706955,
 'total': 11873,
 'HasAns_exact': 22.317813765182187,
 'HasAns_f1': 44.878863650274745,
 'HasAns_total': 5928,
 'NoAns_exact': 0.0,
 'NoAns_f1': 0.0,
 'NoAns_total': 5945,
 'best_exact': 50.10528088941295,
 'best_exact_thresh': 0.0,
 'best_f1': 50.11276753043787,
 'best_f1_thresh': 0.0}