In [1]:
"""
Model - https://huggingface.co/cognitivecomputations/dolphin-2.2.1-mistral-7b fintuned using LORA on samsum validation set and then quantized to AWQ
dataset finetuned on - samsum validation set

llmsearch example shown on - samsum train set and evaluated on samsum test set

Requires:
nltk==3.8.1
rouge_score==0.1.2
py7zr=0.21.0
exllamav2@https://github.com/turboderp/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14+cu121-cp310-cp310-linux_x86_64.whl

pip install nltk==3.8.1 rouge_score==0.1.2 py7zr==0.21.0 autoawq
"""

import torch
import transformers

import llmsearch
import awq
from awq import AutoAWQForCausalLM

print(awq.__version__,torch.__version__, transformers.__version__, llmsearch.__version__)

  from .autonotebook import tqdm as notebook_tqdm


Monkey Patching .generate function of `transformers` library
0.2.4 2.2.0+cu121 4.38.2 0.1.0


In [2]:
import os
from pathlib import Path
from typing import Dict, Any, Optional, Union, List

import nltk
import datasets
import evaluate
import numpy as np

from llmsearch.tuner import Tuner
from llmsearch.utils.mem_utils import gc_cuda
from sklearn.model_selection import GridSearchCV
from llmsearch.utils.common_utils import json_load, json_dump
from llmsearch.utils.model_downloader import download_model_from_hf
from llmsearch.scripts.stopping_criteria import MultiTokenStoppingCriteria
from llmsearch.utils.logging_utils import set_verbosity_info, set_verbosity_debug, set_verbosity_warning
from transformers.modeling_outputs import CausalLMOutputWithPast
from transformers import PreTrainedModel, PretrainedConfig, GenerationConfig, AutoTokenizer, StoppingCriteriaList

In [3]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
seed = 42
batch_size = 1
num_tune_samples = 500
num_test_samples = 500

model_id = "Praful932/dolphin-2.2.1-mistral-7b-samsum-ft-v1-awq"
device = "cuda:0"

In [5]:
def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [label.strip() for label in labels]
    # rougeLSum expects newline after each sentence
    preds = ["\n".join(nltk.sent_tokenize(pred)) for pred in preds]
    labels = ["\n".join(nltk.sent_tokenize(label)) for label in labels]
    return preds, labels

def get_rouge_score(y_true: List, y_pred: List):
    preds, gts = postprocess_text(preds=y_pred, labels=[item['summary'] for item in y_true])

    result = rouge_metric.compute(predictions=preds, references=gts, use_stemmer=True, use_aggregator=False)
    return np.mean(result['rouge2'])

class DatasetWrapper:
    def __init__(self, hf_dataset, tokenizer, prompt_template = "Summarize : {dialogue}", input_key = "", output_key = "", system_prompt = "", add_output = True):
        self.tokenizer = tokenizer
        self.hf_dataset = hf_dataset
        self.hf_dataset = self.hf_dataset.map(lambda x : {"chat_format" : ([{'role' : "system", "content" : system_prompt}] if system_prompt else []) + [
            {
                'role' : "user", "content" : prompt_template.format(**{input_key : x[input_key]})
            }
        ] + ([{'role' : 'assistant', "content" : x[output_key]}] if add_output else [])})

    def apply_chat_template(self, add_gen_prompt = True):
        """Converts the dataset to a chat based format"""
        self.hf_dataset = self.hf_dataset.map(lambda x: {"formatted_chat": self.tokenizer.apply_chat_template(x["chat_format"], tokenize=False, add_generation_prompt=add_gen_prompt)})



def load_model_and_tokenizer(model_id, temp_model_dir):
    temp_model_dir.mkdir(exist_ok=True, parents=True)
    output_folder = download_model_from_hf(model_id, save_dir=temp_model_dir, branch="main")

    gc_cuda()

    model = AutoAWQForCausalLM.from_quantized(
        quant_path=output_folder, fuse_layers=True, device_map={"": device}, local_files_only=True
    )
    tokenizer = AutoTokenizer.from_pretrained(
        output_folder, local_files_only=True
    )
    tokenizer.pad_token = tokenizer.unk_token

    return model, tokenizer

def load_dataset():

    # model was finetuned on val set

    train_dataset = datasets.load_dataset("samsum")['train']
    train_dataset = DatasetWrapper(train_dataset, tokenizer, input_key = "dialogue", output_key = "summary", add_output = False)
    train_dataset.apply_chat_template(add_gen_prompt=True)
    train_dataset.hf_dataset = train_dataset.hf_dataset.shuffle(seed=seed)

    samples_to_tune_on = train_dataset.hf_dataset.select(range(num_tune_samples))

    test_dataset = datasets.load_dataset("samsum")['test']
    test_dataset = DatasetWrapper(test_dataset, tokenizer, input_key = "dialogue", output_key = "summary", add_output = False)
    test_dataset.apply_chat_template(add_gen_prompt=True)
    test_dataset.hf_dataset = test_dataset.hf_dataset.shuffle(seed=seed)
    test_samples = test_dataset.hf_dataset.select(range(num_test_samples))

    return samples_to_tune_on, test_samples

In [6]:
# Load Model, Tokenizer, Dataset
temp_model_dir = Path(f"./temp_dir/")
temp_model_dir.mkdir(exist_ok=True, parents=True)

model, tokenizer = load_model_and_tokenizer(model_id, temp_model_dir)

# Dataset we will use to find the best generation parameters and test samples
samples_to_tune_on,test_dataset = load_dataset()

# create stop token criteria
multi_token_stop_criteria_ob = MultiTokenStoppingCriteria(sequence_ids=[32000])
stopping_criteria = StoppingCriteriaList([multi_token_stop_criteria_ob])
callbacks_after_inference = [multi_token_stop_criteria_ob.reset]

rouge_metric = evaluate.load("rouge")

Model already exists in temp_dir/Praful932_dolphin-2.2.1-mistral-7b-samsum-ft-v1-awq. Checking the model files...
Checksum validated: model.safetensors  dbaac51863a65313f57cabca6832ea59fad78b4cc442dd6966479a747ced2a14
[+] Validated checksums of all model files!


Replacing layers...: 100%|██████████| 32/32 [00:04<00:00,  7.05it/s]
Fusing layers...: 100%|██████████| 32/32 [00:02<00:00, 13.17it/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
set_verbosity_warning()

In [8]:
tuner_ob = Tuner(
    model=model,
    tokenizer=tokenizer,
    dataset=samples_to_tune_on,
    device="cuda:0",
    batch_size=batch_size,
    tokenizer_encode_args={"padding": "longest",'truncation' : True, "add_special_tokens": False},
    tokenizer_decode_args={"spaces_between_special_tokens": False, 'skip_special_tokens' : True},
    scorer=get_rouge_score,
    prompt_template="{formatted_chat}",
    seed=seed,
    column_mapping={"input_cols": ["formatted_chat"], "eval_cols": ["summary"]},
    callbacks_after_inference=callbacks_after_inference,
)

In [9]:
print(tuner_ob.dataset['_X'][0])

<|im_start|>user
Summarize : Lucy: omg did you see JK this morning?
Sue: I try to avoid it lol
Lucy: you should have seen it it was disgusting
Sue: I cant do it anymore i try to listen to the radio in the mornings.. jk makes you think the whole world is full of idiots lol
Lucy: you may be right I dont know how some of them can go on there in public for the world to see
Sue: I would die if I got a call to go on there lol
Sue: could you imagine ha ha 
Lucy: I would piss myself If I saw you and Andy up there
Sue: over my dead body !<|im_end|>
<|im_start|>assistant



In [10]:
gen_params1 = {
    'max_new_tokens' : 70,
    'stopping_criteria' : stopping_criteria,
    'generation_seed' : 42,
}

scores_before, outputs_before = tuner_ob.get_score(gen_params1)

  0%|          | 0/500 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/500 [00:03<32:26,  3.90s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 2/500 [00:05<20:25,  2.46s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/500 [00:06<16:39,  2.01s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 4/500 [00:09<17:52,  2.16s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 5/500 [00:10<15:15,  1.85s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 6/500 [00:12<15:27,  1.88s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|▏         | 7/500 [00:13<13:01,  1.59s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 8/500 [00:14<11:38,  1.42s/it]Setting `pad_token_id` to `eos_token_id`:32000 f

In [11]:
print(scores_before)

0.25437221787091235


In [12]:
hyp_space = {
    'max_new_tokens' : [70],
    'stopping_criteria' : [stopping_criteria],
    'generation_seed' : [42],
    'do_sample' : [True],

    'temperature': [0.1,0.3,0.5,0.7,0.9,1.0],
    'top_k': [50,60,70,80],
    'no_repeat_ngram_size': [0],
}

clf = GridSearchCV(
    estimator = tuner_ob.estimator,
    param_grid=hyp_space,
    scoring = tuner_ob.scorer,
    cv = 2,
    n_jobs = None,
    verbose=3,
)

In [13]:
clf.fit(X=tuner_ob.dataset["_X"], y=tuner_ob.dataset['_y'])

Fitting 2 folds for each of 24 candidates, totalling 48 fits


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<02:45,  1.50it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:26,  1.20it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:26,  1.20it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:43,  1.15s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:29,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.


  2%|▏         | 6/250 [00:06<04:52,  1.20s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:13,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:56,  1.02it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  4%|▎         | 9/250 [00:08<03:40,  1.10it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  4%|▍         | 10/250 [00:09<03:31,  1.13it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  4%|▍         | 11/250 [00:11<04:48,  1.21s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  5%|▍         | 12/250 [00:12<04:20,  1.09s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  5%|▌         | 13/250 [00:12<03:38,  1.09it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  6%|▌         | 14/250 [00:13<03:45,  1.05i

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.1, top_k=50;, score=0.257 total time= 3.8min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<07:09,  1.72s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:02<04:45,  1.15s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<05:15,  1.28s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:34,  1.12s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:30,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:14,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:03,  1.00s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:36,  1.12it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.1, top_k=50;, score=0.242 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<02:46,  1.49it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:28,  1.19it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:29,  1.18it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:46,  1.17s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:31,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:56,  1.21s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:13,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:59,  1.01it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.1, top_k=60;, score=0.257 total time= 3.8min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<07:13,  1.74s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:02<04:46,  1.16s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<05:12,  1.27s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:32,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:28,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:13,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:03,  1.00s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:37,  1.12it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.1, top_k=60;, score=0.242 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<02:44,  1.51it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:26,  1.20it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:27,  1.19it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:44,  1.16s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:30,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:54,  1.21s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:12,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:56,  1.02it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.1, top_k=70;, score=0.257 total time= 3.8min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<07:20,  1.77s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:02<04:53,  1.18s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<05:21,  1.30s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:38,  1.13s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:33,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:16,  1.05s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:04,  1.01s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:41,  1.09it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.1, top_k=70;, score=0.242 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<02:44,  1.52it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:25,  1.20it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:27,  1.19it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:44,  1.16s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:30,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:55,  1.21s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:15,  1.05s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:58,  1.02it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.1, top_k=80;, score=0.257 total time= 3.8min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<07:12,  1.74s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:02<04:45,  1.15s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<05:15,  1.28s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:35,  1.12s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:31,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:16,  1.05s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:06,  1.02s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:39,  1.10it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.1, top_k=80;, score=0.242 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:02,  1.37it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:33,  1.16it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:59,  1.03it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:41,  1.14s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<04:03,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:38,  1.14s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:03,  1.00s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:58,  1.01it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.3, top_k=50;, score=0.242 total time= 3.9min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<02:59,  1.39it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:02,  1.36it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:33,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:03<04:19,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:32,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:41,  1.15s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:15,  1.05s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:40,  1.10it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.3, top_k=50;, score=0.238 total time= 3.9min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:01,  1.37it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:32,  1.17it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:56,  1.04it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:39,  1.14s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<04:02,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:34,  1.12s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:58,  1.02it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:53,  1.04it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.3, top_k=60;, score=0.242 total time= 3.9min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<02:59,  1.39it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:03,  1.35it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:33,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:03<04:19,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:32,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:42,  1.16s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:17,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:41,  1.09it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.3, top_k=60;, score=0.238 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:02,  1.36it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:34,  1.16it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:58,  1.04it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:41,  1.14s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<04:03,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:35,  1.13s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<03:59,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:54,  1.03it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.3, top_k=70;, score=0.242 total time= 3.9min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:00,  1.38it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:15,  1.27it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:48,  1.17s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:35,  1.12s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:43,  1.16s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:48,  1.18s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:20,  1.07s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:43,  1.08it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.3, top_k=70;, score=0.238 total time= 3.9min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:01,  1.37it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:33,  1.16it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:57,  1.04it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:40,  1.14s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<04:03,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:35,  1.13s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:58,  1.02it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:54,  1.03it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.3, top_k=80;, score=0.242 total time= 3.9min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:02,  1.36it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:04,  1.34it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:37,  1.12s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:21,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:34,  1.12s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:43,  1.16s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:16,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:40,  1.10it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.3, top_k=80;, score=0.238 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:34,  1.16it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:49,  1.08it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:04,  1.34it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:03<04:22,  1.07s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<03:53,  1.05it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:57,  1.22s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:14,  1.05s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:58,  1.01it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.5, top_k=50;, score=0.227 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:06,  1.33it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:12,  1.29it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:43,  1.15s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:28,  1.09s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:30,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:13,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:49,  1.06it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:23,  1.19it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.5, top_k=50;, score=0.230 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:25,  1.21it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:42,  1.11it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:00,  1.37it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:03<04:23,  1.07s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<03:54,  1.05it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:55,  1.21s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:13,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:55,  1.03it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.5, top_k=60;, score=0.228 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:03,  1.36it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:05,  1.34it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:35,  1.12s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:21,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:24,  1.08s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:07,  1.02s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:44,  1.08it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:18,  1.22it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.5, top_k=60;, score=0.230 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:28,  1.20it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:44,  1.11it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:01,  1.36it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:03<04:19,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<03:51,  1.06it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:52,  1.20s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<04:10,  1.03s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:53,  1.04it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.5, top_k=70;, score=0.228 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:00,  1.38it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:01,  1.36it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:43,  1.15s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:28,  1.09s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:29,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:11,  1.03s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:46,  1.07it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:20,  1.21it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.5, top_k=70;, score=0.230 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:27,  1.20it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:43,  1.11it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:01,  1.36it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:03<04:20,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<03:51,  1.06it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:51,  1.20s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<04:10,  1.03s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:54,  1.03it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.5, top_k=80;, score=0.228 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:01,  1.38it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:01,  1.36it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:34,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:21,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:28,  1.09s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:11,  1.03s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:46,  1.07it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:20,  1.21it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.5, top_k=80;, score=0.230 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:56,  1.05it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:56,  1.05it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:23,  1.21it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:30,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<04:00,  1.02it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:05<03:47,  1.07it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:32,  1.14it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:44,  1.08it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.7, top_k=50;, score=0.211 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:56,  1.05it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:18,  1.25it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:50,  1.17s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:16,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:53,  1.20s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:30,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:18,  1.07s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:41,  1.09it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.7, top_k=50;, score=0.200 total time= 4.1min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<04:00,  1.04it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:57,  1.04it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:25,  1.20it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:32,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<04:02,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:05<03:48,  1.07it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:33,  1.14it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:45,  1.07it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.7, top_k=60;, score=0.199 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:55,  1.06it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:17,  1.25it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:49,  1.17s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:15,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:52,  1.20s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:29,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:17,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:39,  1.10it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.7, top_k=60;, score=0.200 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<04:12,  1.01s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<04:02,  1.02it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:26,  1.20it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:31,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<04:01,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:05<03:47,  1.07it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:32,  1.14it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:44,  1.08it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.7, top_k=70;, score=0.199 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<04:13,  1.02s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:34,  1.16it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<05:07,  1.24s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:30,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<05:06,  1.25s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:41,  1.15s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:26,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:46,  1.07it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.7, top_k=70;, score=0.200 total time= 4.1min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:56,  1.05it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:55,  1.05it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<03:23,  1.21it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:30,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<04:01,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:05<03:47,  1.07it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:32,  1.14it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:43,  1.08it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.7, top_k=80;, score=0.199 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:56,  1.05it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:18,  1.25it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:52,  1.18s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:19,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:55,  1.21s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:30,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:19,  1.07s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:41,  1.09it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.7, top_k=80;, score=0.200 total time= 4.0min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<04:13,  1.02s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<02:30,  1.65it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<04:20,  1.05s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:03<04:21,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<05:18,  1.30s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:07<05:32,  1.36s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:41,  1.16s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<04:07,  1.02s/it]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.9, top_k=50;, score=0.154 total time= 4.2min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:14,  1.28it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<02:58,  1.39it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<04:05,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:03<03:37,  1.13it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<03:37,  1.13it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:05<03:49,  1.06it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<04:01,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:29,  1.16it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.9, top_k=50;, score=0.165 total time= 4.2min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<04:13,  1.02s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<02:29,  1.65it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<04:23,  1.07s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:25,  1.08s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<05:20,  1.31s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:07<05:36,  1.38s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:08<04:46,  1.18s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<04:10,  1.04s/it]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.9, top_k=60;, score=0.165 total time= 4.1min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:30,  1.18it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:16,  1.26it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<04:21,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:03<03:47,  1.08it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<03:44,  1.09it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:05<03:54,  1.04it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<04:06,  1.01s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:32,  1.14it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.9, top_k=60;, score=0.177 total time= 4.3min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<04:22,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<02:33,  1.61it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<04:24,  1.07s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:23,  1.07s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<05:19,  1.30s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:07<05:36,  1.38s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:08<04:44,  1.17s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<04:09,  1.03s/it]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.9, top_k=70;, score=0.153 total time= 4.2min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:13,  1.28it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<02:57,  1.40it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<04:04,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:03<03:36,  1.14it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<03:37,  1.12it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:05<03:50,  1.06it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<04:02,  1.00it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:29,  1.16it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.9, top_k=70;, score=0.175 total time= 4.2min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<04:14,  1.02s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<02:30,  1.65it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<04:21,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:22,  1.07s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<05:19,  1.31s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:07<05:33,  1.37s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:08<04:45,  1.17s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<04:16,  1.06s/it]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.9, top_k=80;, score=0.157 total time= 4.3min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:12,  1.29it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<02:57,  1.40it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:02<04:04,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:03<03:36,  1.13it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:04<03:37,  1.13it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:05<03:49,  1.06it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<04:01,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:29,  1.16it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=0.9, top_k=80;, score=0.156 total time= 4.1min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<06:45,  1.63s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:02<05:21,  1.29s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:21,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:05<05:10,  1.26s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:29,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:07<04:56,  1.21s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:08<04:18,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:41,  1.09it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=1.0, top_k=50;, score=0.141 total time= 4.1min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:59,  1.04it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:14,  1.27it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:32,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:15,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:21,  1.07s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:05<03:57,  1.03it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:35,  1.13it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:17,  1.22it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=1.0, top_k=50;, score=0.153 total time= 4.3min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<06:28,  1.56s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:02<05:12,  1.26s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:16,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<05:07,  1.25s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:28,  1.09s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:07<04:56,  1.21s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<04:18,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:42,  1.09it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=1.0, top_k=60;, score=0.152 total time= 4.3min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:56,  1.05it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:12,  1.29it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:30,  1.10s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:16,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:22,  1.07s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:05<03:58,  1.02it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:34,  1.13it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:12,  1.26it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=1.0, top_k=60;, score=0.155 total time= 4.2min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:01<06:34,  1.59s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:02<05:16,  1.28s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:20,  1.05s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:05<05:15,  1.28s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:34,  1.12s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:07<05:01,  1.23s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:08<04:22,  1.08s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:08<03:45,  1.08it/s]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=1.0, top_k=70;, score=0.146 total time= 4.3min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<04:06,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:21,  1.23it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:42,  1.14s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:26,  1.09s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:39,  1.14s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:06<04:12,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:07<03:47,  1.07it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:22,  1.20it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=1.0, top_k=70;, score=0.147 total time= 4.3min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:48,  1.09it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:02<05:11,  1.26s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:03,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:03,  1.01it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<05:06,  1.25s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:07<05:44,  1.41s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:08<04:58,  1.23s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:09<04:41,  1.16s/it]Setting `

[CV 1/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=1.0, top_k=80;, score=0.143 total time= 4.3min


  0%|          | 0/250 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/250 [00:00<03:56,  1.05it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 2/250 [00:01<03:12,  1.29it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/250 [00:03<04:30,  1.09s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 4/250 [00:04<04:14,  1.03s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 5/250 [00:05<04:19,  1.06s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 6/250 [00:05<03:56,  1.03it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 7/250 [00:06<03:34,  1.13it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  3%|▎         | 8/250 [00:07<03:12,  1.26it/s]Setting `

[CV 2/2] END do_sample=True, generation_seed=42, max_new_tokens=70, no_repeat_ngram_size=0, stopping_criteria=[<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], temperature=1.0, top_k=80;, score=0.153 total time= 4.1min


In [14]:
scores_after, outputs_after = tuner_ob.get_score(clf.best_params_)

  0%|          | 0/500 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/500 [00:00<05:30,  1.51it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 2/500 [00:01<06:55,  1.20it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/500 [00:02<07:01,  1.18it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 4/500 [00:04<09:38,  1.17s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 5/500 [00:05<09:09,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 6/500 [00:06<10:00,  1.22s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|▏         | 7/500 [00:07<08:33,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 8/500 [00:08<08:02,  1.02it/s]Setting `

 11%|█         | 53/500 [00:51<07:37,  1.02s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
 11%|█         | 54/500 [00:51<06:36,  1.13it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
 11%|█         | 55/500 [00:52<05:40,  1.31it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
 11%|█         | 56/500 [00:52<05:46,  1.28it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
 11%|█▏        | 57/500 [00:53<05:54,  1.25it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
 12%|█▏        | 58/500 [00:54<05:49,  1.26it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
 12%|█▏        | 59/500 [00:55<06:45,  1.09it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
 12%|█▏        | 60/500 [00:57<07:57,  1.09s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
 12%|█▏        | 61/500 [00:58<07:48,  1

In [15]:
print(scores_before, scores_after)

0.25437221787091235 0.25640652169458006


In [16]:
str(clf.best_params_)

"{'do_sample': True, 'generation_seed': 42, 'max_new_tokens': 70, 'no_repeat_ngram_size': 0, 'stopping_criteria': [<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], 'temperature': 0.1, 'top_k': 50}"

In [17]:

d = {
    'scores_before' : scores_before,
    'scores_after' : scores_after,
    'outputs_before' : outputs_before,
    'outputs_after' : outputs_after,
    'best_params' : str(clf.best_params_),
}

f = "./samsum-best-params-500s-capybara-7b.json"
json_dump(d, f)

In [18]:
print(d['best_params'])

{'do_sample': True, 'generation_seed': 42, 'max_new_tokens': 70, 'no_repeat_ngram_size': 0, 'stopping_criteria': [<llmsearch.scripts.stopping_criteria.MultiTokenStoppingCriteria object at 0x7f3b38303610>], 'temperature': 0.1, 'top_k': 50}


In [19]:
# harcoding from above file here due to notebook re-run
len(test_dataset)

500

In [20]:
# eval on test samples

gen_params1 = {
    "max_new_tokens": 70,
    "stopping_criteria": stopping_criteria,
    "generation_seed": 42,
}

oos_scores_before, oos_outputs_before = tuner_ob.get_score(gen_params1,test_dataset)

Map: 100%|██████████| 500/500 [00:00<00:00, 8154.60 examples/s]
  0%|          | 0/500 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/500 [00:00<07:49,  1.06it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 2/500 [00:01<07:32,  1.10it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/500 [00:03<08:36,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 4/500 [00:04<08:37,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 5/500 [00:05<09:38,  1.17s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 6/500 [00:06<09:09,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|▏         | 7/500 [00:07<09:05,  1.11s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end gene

In [21]:
oos_scores_before

0.2554371469507787

In [22]:
test_dataset[1]

{'id': '13681165-1',
 'dialogue': "Alyssa: Have you seen Fergie’s national anthem? Illuminati does a great job.\r\nDerek: This is not normal. I saw it last week…\r\nAlyssa: What do you think about it?\r\nDerek: I can fart bright stripes and bright stars better then she sings.\r\nAlyssa: The best part is that she acts like she nailed it. But at least it's funny in a good way.\r\nDerek: It is 😂",
 'summary': "Derek and Alyssa make fun of Fergie's performance of the national anthem.",
 'chat_format': [{'content': "Summarize : Alyssa: Have you seen Fergie’s national anthem? Illuminati does a great job.\r\nDerek: This is not normal. I saw it last week…\r\nAlyssa: What do you think about it?\r\nDerek: I can fart bright stripes and bright stars better then she sings.\r\nAlyssa: The best part is that she acts like she nailed it. But at least it's funny in a good way.\r\nDerek: It is 😂",
   'role': 'user'}],
 'formatted_chat': "<|im_start|>user\nSummarize : Alyssa: Have you seen Fergie’s nation

In [23]:
oos_scores_after, oos_outputs_after = tuner_ob.get_score(clf.best_params_,test_dataset)

Map: 100%|██████████| 500/500 [00:00<00:00, 9223.24 examples/s]
  0%|          | 0/500 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 1/500 [00:00<07:29,  1.11it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  0%|          | 2/500 [00:01<07:25,  1.12it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 3/500 [00:02<07:12,  1.15it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 4/500 [00:03<07:26,  1.11it/s]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 5/500 [00:05<10:03,  1.22s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|          | 6/500 [00:06<09:21,  1.14s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  1%|▏         | 7/500 [00:07<09:27,  1.15s/it]Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.
  2%|▏         | 8/500 [00:08<10:04,  1.23s/it]Setting `pad_token_id` to `eos_token_id`:32000 f

In [24]:
print(oos_scores_before, oos_scores_after)

0.2554371469507787 0.2590378656058853


In [26]:
d = {
    'scores_before' : scores_before,
    'scores_after' : scores_after,
    'outputs_before' : outputs_before,
    'outputs_after' : outputs_after,

    'oos_scores_before' : oos_scores_before,
    'oos_scores_after' : oos_scores_after,
    'oos_outputs_before' : oos_outputs_before,
    'oos_outputs_after' : oos_outputs_after,
    'best_params' : str(clf.best_params_),
}

f = "./samsum-best-params-500s-tune-capybara-7b.json"
json_dump(d, f)