In [6]:
import os
os.environ["HF_HOME"] = "E:/huggingface"

import transformers
from datasets import load_dataset


dataset = load_dataset("squad_it")
DEVICE = "cuda:0"

In [7]:
from get_model import get_model
model_name = "llamantino"

model, tokenizer = get_model(model_name)
#model = model.to(DEVICE)

model-00001-of-00003.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

bin c:\Users\Samuele\AppData\Local\Programs\Python\Python311\Lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/170 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [8]:
from get_prompt import get_prompt

generate_prompt, stop = get_prompt(model_name)

In [9]:
def build_question(context, question):
    return f"Dato il seguente testo:\n{context}\nRispondi brevemente a questa domanda:\n{question}"

def build_answer(answer):
    return f"Risposta breve: {answer}"

In [10]:
import random

def get_shots(dataset, n):
    conversation = []
    for i in range(n):
        elem = random.choice(dataset["train"])
        q_shot = build_question(elem["context"], elem["question"])
        conversation.append(dict(
            role="user",
            text=q_shot
        ))
        conversation.append(dict(
            role="ai",
            text=build_answer(elem["answers"]["text"][0])
        ))
    return conversation

In [11]:
import torch

def forward_model(*, prompt, model, tokenizer):
    if "pad_token" not in tokenizer.special_tokens_map:
        tokenizer.pad_token = tokenizer.eos_token

    input_ids = tokenizer(
        prompt,
        return_tensors='pt',
        padding=True,
    ).input_ids
    with torch.no_grad():
        input_ids = input_ids.to(DEVICE)
        output_ids = model.generate(
            input_ids=input_ids,
            max_new_tokens=32,
            pad_token_id=tokenizer.eos_token_id,
            prompt_lookup_num_tokens=10
        )

    ret = []
    for i in range(0, len(output_ids)):
        generated_text = tokenizer.decode(
            output_ids[i],
            skip_special_tokens=True
        )
        generated_text = generated_text[len(prompt[i]):]

        if "\n" in generated_text:
            generated_text = generated_text[:generated_text.index("\n")]
        
        ret.append(generated_text.strip())

    return ret

In [12]:
import time

t0 = time.time()
out = forward_model(
    prompt=["La vita è"],
    model=model,
    tokenizer=tokenizer,
)
print(time.time() - t0)

print(out)



18.048433303833008
['piena di sorprese e di inaspettate opportunità. Non abbiate paura di correre rischi e provare c']


In [13]:
import uuid
from tqdm import tqdm

batch_size = 1

predicted_answers = []
theoretical_answers = []

ds = dataset["test"].shuffle(seed=42)

bar = tqdm(ds.iter(batch_size=batch_size), total=len(ds))
total = 0
for elem in bar:
    try:
        ids = elem["id"]
        context = elem["context"]
        question = elem["question"]
        answers = elem["answers"]

        model_inputs = []
        for c, q in zip(context, question):
            model_inputs.append(
                generate_prompt(
                    get_shots(dataset, 3) + [
                        dict(
                            role="user",
                            text=build_question(c, q)
                        )
                    ],
                    do_continue=True
                ) + " " + build_answer("").strip()
            )

        model_outputs = forward_model(
            prompt=model_inputs,
            model=model,
            tokenizer=tokenizer,
        )
        #print(f"DOMANDA:{model_inputs[0][520:]}\nRISPOSTA{model_outputs[0]}\nSOLUZIONE{answers[0]['text']}\n")

        for model_output, id in zip(model_outputs, ids):
            predicted_answers.append(dict(
                id=id,
                prediction_text=model_output,
            ))

        for ans, id in zip(answers, ids):
            theoretical_answers.append(dict(
                id=id,
                answers=ans,
            ))
        
    except Exception as e:
        print(e)
        continue






  0%|          | 0/7609 [00:00<?, ?it/s]

100%|██████████| 7609/7609 [7:23:58<00:00,  3.50s/it]   


In [14]:
import os
import json


model_name += '_13b'
# Create the directory if it doesn't exist
os.makedirs('./cache', exist_ok=True)
with open(f"./cache/generated-squad-{model_name}.json", "w") as f:
    json.dump(dict(
        predicted_answers=predicted_answers,
        theoretical_answers=theoretical_answers,
    ), f, indent=4)

In [15]:
import json
with open(f"./cache/generated-squad-{model_name}.json", "r") as f:
    data = json.load(f)

print(len(data["predicted_answers"]))

7609


In [16]:
import evaluate

predicted_answers = data["predicted_answers"]
theoretical_answers = data["theoretical_answers"]

metric = evaluate.load("squad")
results = metric.compute(predictions=predicted_answers, references=theoretical_answers)

import time


print("=== REPORT ===")
print("current date:", time.strftime("%d/%m/%Y %H:%M:%S"))
print("Dataset: SQuAD-it")
print("Model:", model_name)
print(results)
print("==========================")

Downloading builder script:   0%|          | 0.00/4.53k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.32k [00:00<?, ?B/s]

=== REPORT ===
current date: 19/01/2024 03:11:59
Dataset: SQuAD-it
Model: llamantino_13b
{'exact_match': 0.5388355894335655, 'f1': 11.158163481250739}
