In [1]:
# Requires accelerate==0.27.2, py7zr==0.21.0 , evaluate==0.4.0, rouge_score==0.1.2

import datasets
import torch

from llmsearch.tuner import Tuner
from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteriaList
from llmsearch.scripts.stopping_criteria import MultiTokenStoppingCriteria

  from .autonotebook import tqdm as notebook_tqdm


Monkey Patching .generate function of `transformers` library


In [2]:
model_id = "cognitivecomputations/dolphin-2.9-llama3-8b"
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side = "left", max_length = 1024)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype = torch.float16, device_map = "auto")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:21<00:00,  5.35s/it]


In [3]:
dataset = datasets.load_dataset("samsum")['train']

In [4]:
seed = 42
batch_size = 1
sample_dataset = dataset.shuffle(seed = seed).select(range(10))

In [5]:
multi_token_stop_criteria_ob = MultiTokenStoppingCriteria(sequence_ids=[128256])
stopping_criteria = StoppingCriteriaList([multi_token_stop_criteria_ob])
callbacks_after_inference = [multi_token_stop_criteria_ob.reset]

In [6]:
import evaluate
import numpy as np
rouge = evaluate.load('rouge')

In [7]:
get_rouge_score  = lambda y_pred, y_true : np.mean(rouge.compute(predictions=y_pred, references=y_true['summary'], use_stemmer=True, use_aggregator=False)['rouge2'])

In [14]:
get_rouge_score(["123"], ["451"])

TypeError: list indices must be integers or slices, not str

In [12]:
batch_size = 2
tuner_ob = Tuner(
    model=model,
    tokenizer=tokenizer,
    dataset=sample_dataset,
    device="cuda:0",
    batch_size=batch_size,
    tokenizer_encode_args={"padding": "longest",'truncation' : True, "add_special_tokens": False, 'max_length' : 1024},
    tokenizer_decode_args={"spaces_between_special_tokens": False, 'skip_special_tokens' : True},
    scorer=get_rouge_score,
    prompt_template="Summarize : {dialogue}",
    seed=seed,
    column_mapping={"input_cols": ["dialogue"], "eval_cols": ["summary"]},
    callbacks_after_inference=callbacks_after_inference,
)

In [13]:
gen_params1 = {
    'max_new_tokens' : 70,
    'stopping_criteria' : stopping_criteria,
    'generation_seed' : 42,
}

scores_before, outputs_before = tuner_ob.get_score(gen_params1)

  0%|          | 0/5 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


 20%|██        | 1/5 [00:14<00:56, 14.18s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 40%|████      | 2/5 [01:08<01:52, 37.57s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 60%|██████    | 3/5 [01:28<00:59, 29.71s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 80%|████████  | 4/5 [02:21<00:38, 38.75s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
100%|██████████| 5/5 [03:16<00:00, 39.22s/it]


TypeError: list indices must be integers or slices, not str