### Load the dataset

In [1]:
from torch.utils.data import Dataset, DataLoader

class TextDataset(Dataset):
    def __init__(self, spa_path, wayuu_path):
        with open(spa_path, 'r', encoding='utf-8') as f:
            self.spa_lines = [line.strip() for line in f if line.strip()]

        with open(wayuu_path, 'r', encoding='utf-8') as f:
            self.wayuu_lines = [line.strip() for line in f if line.strip()]

    def __len__(self):
        return len(self.spa_lines)

    def __getitem__(self, idx):
        spa = self.spa_lines[idx]
        wayuu = self.wayuu_lines[idx]
        
        return spa, wayuu
    
spanish_val_file = 'datasets/dev.es.txt'
wayuu_val_file = 'datasets/dev.guc.txt'

# Load the dataset
dataset = TextDataset(spanish_val_file, wayuu_val_file)

### Eval function

In [2]:
import torch
import sacrebleu
from tqdm import tqdm
from vllm import SamplingParams

def get_rewards_translation(generations, correct_translations):

    bleu = sacrebleu.BLEU(effective_order = True)
    def get_bleu_score(sample, correct_translation):
        # Compute bleu score for each sample. 
        # Bleu score normalized to [0, 1]
        return bleu.sentence_score(sample, 
                                   [correct_translation]
                                   ).score

    answer_bleu_scores = [
        get_bleu_score(sample, translation)
        for sample, translation in zip(generations, correct_translations)
    ]
    
    return answer_bleu_scores

translate_prompt_template_tool="""Translate the following Spanish text into Wayuunaiki.
Begin by identifying any words or phrases you're unsure how to translate. Then, you may look up those words using the dictionary tool by wrapping the Spanish word in <spa_to_wayuu> and </spa_to_wayuu>,
and doind that for every unknown word. The dictionary will return matches enclosed in <matches> and </matches>. You can use the dictionary as many times as necessary.
Once you have all the information you need, provide the final translation enclosed in <answer> and </answer>. For example: <answer> xxx </answer>.

Spanish text: {}"""
def generate_batch_completion(model, tokenizer, prompts: list, actions_num=1, **kwargs):
    batch = [[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": translate_prompt_template_tool.format(prompt)}
    ] for prompt in prompts]
    texts = tokenizer.apply_chat_template(
        batch,
        tokenize=False,
        add_generation_prompt=True,
    )

    default_sampling_args = {
        'max_new_tokens': 512,
        'temperature': 0.8,
        'top_p': 0.95,
    }
    default_sampling_args.update(kwargs)

    model_inputs = tokenizer(texts, padding='longest', padding_side='left')

    inputs = model_inputs.input_ids
    dones = [False] * len(prompts)
    prompt_length = [len(input_ids) for input_ids in inputs]
    mask = [[1] * len(input_ids) for input_ids in inputs]
    responses = [""] * len(prompts)
    tools_enabled = kwargs.get('tools', [])
    stop_tokens = [tool['end_token'] for tool in tools_enabled]
    tool_used = [False] * len(prompts)
    how_many_tool_calls = [0] * len(prompts)
    for action_step in range(actions_num + 1 if len(tools_enabled) > 0 else 1):
        sampling_params = SamplingParams(temperature=default_sampling_args["temperature"], top_p=default_sampling_args['top_p'], top_k=-1, max_tokens=default_sampling_args['max_new_tokens'],
            stop=stop_tokens)
        outputs = model.generate(prompt_token_ids=inputs, sampling_params=sampling_params, lora_request=kwargs['lora_request'])

        for j, output in enumerate(outputs):
            if dones[j]:
                continue
            
            for tool in tools_enabled:
                if output.outputs[0].stop_reason == tool['end_token'] and tool['start_token'] in output.outputs[0].text:
                    api_args = output.outputs[0].text.split(tool['start_token'])[1].strip()
                    api_result = tool['api'](api_args)
                    responses[j] += f"{tool['start_token']} " + api_args + f" {tool['end_token']}" + api_result
                    api_result_tokens = tokenizer.encode(api_result, return_tensors=None)
                    inputs[j] += list(output.outputs[0].token_ids) + api_result_tokens

                    tool_used[j] = True
                    how_many_tool_calls[j] += 1
                    break # Only one tool can be used at a time
            if output.outputs[0].finish_reason == "stop" and output.outputs[0].stop_reason is None:
                responses[j] += output.outputs[0].text
                dones[j] = True
            elif output.outputs[0].stop_reason not in stop_tokens:
                # print(f"Unexpected finish reason: {output.outputs[0].finish_reason} {output.outputs[0].stop_reason}")
                responses[j] += output.outputs[0].text

                dones[j] = True

    return responses, tool_used, how_many_tool_calls

import re

def extract_answer(response, transform_fn = lambda x: x, nan_val = None)->str|None:
    ans = re.match('.*?<answer>(.*?)</answer>\s*$', response, re.DOTALL|re.MULTILINE)
    if ans:
        try:
            return transform_fn(ans[1].strip())
        except:
            return nan_val
    return nan_val

def evaluate_model(model, tokenizer, dataloader, actions_num=1, lora_request=None, tools=None):
    sum_bleu = 0
    num_samples = 0
    tools_used_in_total = 0
    calls_per_sample = 0
    
    with torch.no_grad():
        for batch in tqdm(dataloader):
            inputs, targets = batch

            # Generate translations
            outputs, tools_used, how_many_tool_calls = generate_batch_completion(model, tokenizer, inputs, actions_num=actions_num, lora_request=lora_request, tools=tools)

            tools_used_in_total += sum(tools_used)
            calls_per_sample += sum(how_many_tool_calls)

            generated_translations = [
                extract_answer(output, transform_fn=lambda x: x.strip(), nan_val='')
                for output in outputs
            ]
            # Calculate BLEU scores
            bleu_scores = get_rewards_translation(generated_translations, targets)
            
            sum_bleu += sum(bleu_scores)
            num_samples += len(bleu_scores)
    avg_bleu = sum_bleu / num_samples if num_samples > 0 else 0
    tools_used_avg = tools_used_in_total / num_samples
    calls_per_sample_avg = calls_per_sample / tools_used_in_total
    return avg_bleu, tools_used_avg, calls_per_sample_avg

  from .autonotebook import tqdm as notebook_tqdm
2025-05-09 20:27:27,712	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


### tools

In [3]:
def spa_to_wayu_dictionary(spanish_word, max_matches=5):
    dictionary_path = 'assets/spanish_to_wayuunaiki_short.csv'

    with open(dictionary_path, 'r', encoding='utf-8') as f:
        all_matches = []
        line = f.readline()
        while line != '' and len(all_matches) < max_matches:
            data = line.strip().split(',')
            if re.search(rf'\b{re.escape(spanish_word)}\b', data[0], re.IGNORECASE):
                all_matches.append(data)
            line = f.readline()

    if len(all_matches) > 0:
        result = " <matches> " + '\n'.join(f'{spa}: {wayuu}' for spa, wayuu in all_matches) + " </matches>"
        # print(f'CORRECT USE OF SPA_TO_WAYU TOOL. Word: {spanish_word}, Result: {result}')
    else:
        result = " <matches> No matches found </matches>"
        # print(f'NO_MATCHES SPA_TO_WAYU TOOL. Word: {spanish_word}')

    return result

TOOLS = [
    {
        'name': 'spa_to_wayu',
        'description': 'A tool that translates a word from Spanish to Wayuunaiki.',
        'api': spa_to_wayu_dictionary,
        'start_token': '<spa_to_wayuu>',
        'end_token': '</spa_to_wayuu>',
    }
]

### Model with SFT

In [4]:
# from pretrained peft model
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from vllm import LLM, SamplingParams
from vllm.lora.request import LoRARequest


def load_model(model_name_or_path, peft_model_id):
    # Load the base model
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto", torch_dtype='auto')
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

    # Load the PEFT model
    peft_model = PeftModel.from_pretrained(model, peft_model_id)
    
    return peft_model, tokenizer

model, tokenizer = load_model("Qwen/Qwen2.5-0.5B-Instruct", 'models/sft_base_qwen')
vllm_lora_adapter = 'models/sft_base_qwen'
base_model_name = "Qwen/Qwen2.5-0.5B-Instruct"

inference_engine = LLM(
    model=base_model_name,
    enable_lora=True,
    max_lora_rank=64,
    max_loras=1,
    gpu_memory_utilization=0.2,
    # enable_prefix_caching=True,
    swap_space=6,
    scheduling_policy="fcfs",
    dtype=torch.bfloat16,
    max_model_len=768,
    # enable_sleep_mode=True,
    )

dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Evaluate the model
avg_bleu, tools_used_avg, calls_per_sample_avg = evaluate_model(inference_engine, tokenizer, dataloader, actions_num=4, lora_request=LoRARequest('adapter', 1, vllm_lora_adapter), tools=TOOLS)
print(f"Average BLEU score: {avg_bleu:.4f}")
print(f"Average tools used: {tools_used_avg:.4f}")
print(f"Average calls per sample: {calls_per_sample_avg:.4f}")

[2025-05-09 20:28:52,189] [INFO] [real_accelerator.py:239:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


INFO 05-09 20:28:55 __init__.py:207] Automatically detected platform cuda.
INFO 05-09 20:29:02 config.py:549] This model supports multiple tasks: {'reward', 'generate', 'classify', 'embed', 'score'}. Defaulting to 'generate'.
INFO 05-09 20:29:02 llm_engine.py:234] Initializing a V0 LLM engine (v0.7.3) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=Fal

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]
Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:00<00:00,  3.32it/s]
Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:00<00:00,  3.31it/s]


INFO 05-09 20:29:05 model_runner.py:1115] Loading model weights took 0.9254 GB
INFO 05-09 20:29:05 punica_selector.py:18] Using PunicaWrapperGPU.





INFO 05-09 20:29:06 worker.py:267] Memory profiling takes 1.01 seconds
INFO 05-09 20:29:06 worker.py:267] the current vLLM instance can use total_gpu_memory (11.99GiB) x gpu_memory_utilization (0.20) = 2.40GiB
INFO 05-09 20:29:06 worker.py:267] model weights take 0.93GiB; non_torch_memory takes 0.02GiB; PyTorch activation peak memory takes 1.39GiB; the rest of the memory reserved for KV Cache is 0.07GiB.
INFO 05-09 20:29:06 executor_base.py:111] # cuda blocks: 375, # CPU blocks: 32768
INFO 05-09 20:29:06 executor_base.py:116] Maximum concurrency for 768 tokens per request: 7.81x
INFO 05-09 20:29:06 model_runner.py:1434] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as nee

Capturing CUDA graph shapes: 100%|██████████| 35/35 [00:11<00:00,  2.99it/s]

INFO 05-09 20:29:18 model_runner.py:1562] Graph capturing finished in 12 secs, took 0.31 GiB
INFO 05-09 20:29:18 llm_engine.py:436] init engine (profile, create kv cache, warmup model) took 13.32 seconds



  0%|          | 0/104 [00:00<?, ?it/s]



  outputs, tools_used, how_many_tool_calls = generate_batch_completion(model, tokenizer, inputs, actions_num=actions_num, lora_request=lora_request, tools=tools)




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.42it/s, est. speed input: 1616.22 toks/s, output: 1066.17 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.98it/s, est. speed input: 1231.09 toks/s, output: 1202.66 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.61it/s, est. speed input: 1871.90 toks/s, output: 1086.96 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.38it/s, est. speed input: 1910.52 toks/s, output: 1149.50 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.09it/s, est. speed input: 1422.36 toks/s, output: 1187.61 toks/s]
  1%|          | 1/104 [00:44<1:15:56, 44.23s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.94it/s, est. speed input: 1205.12 toks/s, output: 1169.40 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.52it/s, est. speed input: 1359.84 toks/s, output: 1091.80 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.98it/s, est. speed input: 1278.16 toks/s, output: 1210.89 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.08it/s, est. speed input: 1331.15 toks/s, output: 1177.68 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.18it/s, est. speed input: 1359.99 toks/s, output: 1167.90 toks/s]
  2%|▏         | 2/104 [01:36<1:23:34, 49.17s/it]



Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.70it/s, est. speed input: 1121.17 toks/s, output: 1165.05 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.66it/s, est. speed input: 1113.59 toks/s, output: 1149.75 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:20<00:00,  3.19it/s, est. speed input: 976.58 toks/s, output: 1089.96 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:18<00:00,  3.42it/s, est. speed input: 1046.35 toks/s, output: 1121.44 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.76it/s, est. speed input: 1153.10 toks/s, output: 1133.22 toks/s]
  3%|▎         | 3/104 [03:07<1:54:39, 68.11s/it]



Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.79it/s, est. speed input: 1662.17 toks/s, output: 1149.75 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.85it/s, est. speed input: 1633.88 toks/s, output: 1146.81 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:05<00:00, 11.60it/s, est. speed input: 2588.70 toks/s, output: 1008.77 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:06<00:00,  9.74it/s, est. speed input: 2282.44 toks/s, output: 1073.01 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.92it/s, est. speed input: 1660.42 toks/s, output: 1128.84 toks/s]
  4%|▍         | 4/104 [03:44<1:33:15, 55.96s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.05it/s, est. speed input: 1191.10 toks/s, output: 1190.62 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.85it/s, est. speed input: 1376.38 toks/s, output: 1053.71 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.57it/s, est. speed input: 1341.47 toks/s, output: 1038.35 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.24it/s, est. speed input: 1509.81 toks/s, output: 1082.91 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.47it/s, est. speed input: 1352.65 toks/s, output: 1066.01 toks/s]
  5%|▍         | 5/104 [04:33<1:27:54, 53.28s/it]



Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.47it/s, est. speed input: 1479.79 toks/s, output: 1088.35 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.53it/s, est. speed input: 1333.65 toks/s, output: 1031.74 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.45it/s, est. speed input: 1382.97 toks/s, output: 1046.91 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.41it/s, est. speed input: 1408.42 toks/s, output: 1112.30 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.44it/s, est. speed input: 1665.25 toks/s, output: 1127.17 toks/s]
  6%|▌         | 6/104 [05:20<1:23:47, 51.30s/it]



Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.81it/s, est. speed input: 1015.43 toks/s, output: 1176.88 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.29it/s, est. speed input: 912.57 toks/s, output: 1262.71 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.37it/s, est. speed input: 1143.91 toks/s, output: 1229.95 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.80it/s, est. speed input: 1244.24 toks/s, output: 1187.12 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.77it/s, est. speed input: 1237.87 toks/s, output: 1170.93 toks/s]
  7%|▋         | 7/104 [06:23<1:28:46, 54.91s/it]



Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.76it/s, est. speed input: 929.50 toks/s, output: 1151.06 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.76it/s, est. speed input: 932.71 toks/s, output: 1146.40 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:18<00:00,  3.48it/s, est. speed input: 868.18 toks/s, output: 1146.12 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:21<00:00,  3.04it/s, est. speed input: 762.84 toks/s, output: 1152.49 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:15<00:00,  4.08it/s, est. speed input: 1027.49 toks/s, output: 1183.46 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.85it/s, est. speed input: 1062.61 toks/s, output: 1106.96 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.39it/s, est. speed input: 1197.34 toks/s, output: 1121.03 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.58it/s, est. speed input: 1032.38 toks/s, output: 1088.54 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.45it/s, est. speed input: 1010.09 toks/s, output: 1178.49 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.50it/s, est. speed input: 1024.33 toks/s, output: 1198.69 toks/s]
  9%|▊         | 9/104 [09:00<1:45:13, 66.45s/it]



Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.73it/s, est. speed input: 1469.29 toks/s, output: 1146.91 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.66it/s, est. speed input: 1744.65 toks/s, output: 1130.23 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.63it/s, est. speed input: 1367.24 toks/s, output: 1064.45 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.85it/s, est. speed input: 1847.88 toks/s, output: 1102.51 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:06<00:00,  9.90it/s, est. speed input: 2077.57 toks/s, output: 1097.16 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.81it/s, est. speed input: 1208.15 toks/s, output: 1208.78 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.00it/s, est. speed input: 1045.11 toks/s, output: 1109.99 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.04it/s, est. speed input: 1262.28 toks/s, output: 1140.07 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.29it/s, est. speed input: 897.45 toks/s, output: 1175.48 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.32it/s, est. speed input: 1111.82 toks/s, output: 1163.14 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.92it/s, est. speed input: 1225.78 toks/s, output: 1176.37 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.73it/s, est. speed input: 1213.31 toks/s, output: 1181.95 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.53it/s, est. speed input: 1411.77 toks/s, output: 1091.36 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.94it/s, est. speed input: 1319.93 toks/s, output: 1260.96 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.95it/s, est. speed input: 1349.82 toks/s, output: 1136.18 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.89it/s, est. speed input: 1302.92 toks/s, output: 1041.92 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.55it/s, est. speed input: 1706.14 toks/s, output: 1168.32 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.16it/s, est. speed input: 1708.24 toks/s, output: 1129.77 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.92it/s, est. speed input: 1958.60 toks/s, output: 1050.74 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.81it/s, est. speed input: 1965.81 toks/s, output: 1070.06 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.66it/s, est. speed input: 1182.39 toks/s, output: 1130.23 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.63it/s, est. speed input: 996.92 toks/s, output: 1110.67 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.93it/s, est. speed input: 1279.24 toks/s, output: 1154.69 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.81it/s, est. speed input: 1044.52 toks/s, output: 1147.69 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.58it/s, est. speed input: 997.04 toks/s, output: 1137.76 toks/s]
 13%|█▎        | 14/104 [13:17<1:23:09, 55.44s/it]



Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.71it/s, est. speed input: 1096.88 toks/s, output: 1148.01 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.76it/s, est. speed input: 1118.53 toks/s, output: 1141.93 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:20<00:00,  3.08it/s, est. speed input: 920.60 toks/s, output: 1077.72 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:20<00:00,  3.07it/s, est. speed input: 917.28 toks/s, output: 1091.87 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.64it/s, est. speed input: 1087.89 toks/s, output: 1164.69 toks/s]
 14%|█▍        | 15/104 [14:51<1:39:16, 66.93s/it]



Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.45it/s, est. speed input: 1258.07 toks/s, output: 1122.69 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.32it/s, est. speed input: 1264.47 toks/s, output: 1091.07 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:06<00:00,  9.25it/s, est. speed input: 1888.41 toks/s, output: 1136.86 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.27it/s, est. speed input: 1709.82 toks/s, output: 1142.16 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:06<00:00,  9.43it/s, est. speed input: 1979.17 toks/s, output: 1064.60 toks/s]
 15%|█▌        | 16/104 [15:32<1:27:05, 59.38s/it]



Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.05it/s, est. speed input: 1056.37 toks/s, output: 1163.93 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.88it/s, est. speed input: 1235.71 toks/s, output: 1196.00 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.01it/s, est. speed input: 1269.65 toks/s, output: 1154.17 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.71it/s, est. speed input: 1218.44 toks/s, output: 1212.29 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.23it/s, est. speed input: 1119.31 toks/s, output: 1166.79 toks/s]
 16%|█▋        | 17/104 [16:30<1:25:27, 58.93s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.25it/s, est. speed input: 1225.38 toks/s, output: 1109.72 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.36it/s, est. speed input: 1509.45 toks/s, output: 1102.14 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.87it/s, est. speed input: 1445.27 toks/s, output: 1079.84 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.86it/s, est. speed input: 1491.22 toks/s, output: 1100.75 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.81it/s, est. speed input: 1939.00 toks/s, output: 1087.77 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.16it/s, est. speed input: 1207.27 toks/s, output: 1153.08 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.29it/s, est. speed input: 1263.79 toks/s, output: 1152.61 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.86it/s, est. speed input: 1819.06 toks/s, output: 1106.86 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.86it/s, est. speed input: 1664.08 toks/s, output: 1143.01 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.53it/s, est. speed input: 1393.95 toks/s, output: 1073.33 toks/s]
 18%|█▊        | 19/104 [18:02<1:13:54, 52.18s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.98it/s, est. speed input: 1226.43 toks/s, output: 1196.23 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.49it/s, est. speed input: 1342.30 toks/s, output: 1017.26 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.16it/s, est. speed input: 1283.75 toks/s, output: 1180.77 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.45it/s, est. speed input: 1142.13 toks/s, output: 1184.43 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.98it/s, est. speed input: 1252.56 toks/s, output: 1192.89 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:06<00:00,  9.39it/s, est. speed input: 1794.07 toks/s, output: 1139.78 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  9.12it/s, est. speed input: 1846.27 toks/s, output: 1102.89 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.66it/s, est. speed input: 1397.25 toks/s, output: 1079.06 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  7.05it/s, est. speed input: 1538.11 toks/s, output: 1084.52 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  7.03it/s, est. speed input: 1574.65 toks/s, output: 1054.19 toks/s]
 20%|██        | 21/104 [19:37<1:08:23, 49.44s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.27it/s, est. speed input: 1235.02 toks/s, output: 1115.51 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.67it/s, est. speed input: 1523.76 toks/s, output: 1134.79 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.70it/s, est. speed input: 1330.90 toks/s, output: 1008.54 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.45it/s, est. speed input: 1281.67 toks/s, output: 1041.53 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.03it/s, est. speed input: 1198.02 toks/s, output: 1187.56 toks/s]
 21%|██        | 22/104 [20:26<1:07:15, 49.22s/it]



Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.25it/s, est. speed input: 1443.76 toks/s, output: 1109.01 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.32it/s, est. speed input: 1478.66 toks/s, output: 1066.80 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.74it/s, est. speed input: 1381.98 toks/s, output: 1043.64 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.06it/s, est. speed input: 1251.15 toks/s, output: 1163.09 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.06it/s, est. speed input: 1258.64 toks/s, output: 1146.74 toks/s]
 22%|██▏       | 23/104 [21:14<1:06:03, 48.93s/it]



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.47it/s, est. speed input: 1187.52 toks/s, output: 1132.28 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.37it/s, est. speed input: 1174.58 toks/s, output: 1159.04 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.54it/s, est. speed input: 996.40 toks/s, output: 1131.65 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.00it/s, est. speed input: 1097.86 toks/s, output: 1140.20 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.47it/s, est. speed input: 980.57 toks/s, output: 1156.95 toks/s]
 23%|██▎       | 24/104 [22:19<1:11:37, 53.72s/it]



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.74it/s, est. speed input: 1171.62 toks/s, output: 1134.91 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.66it/s, est. speed input: 949.80 toks/s, output: 1151.31 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.41it/s, est. speed input: 1307.61 toks/s, output: 1008.35 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.81it/s, est. speed input: 1184.44 toks/s, output: 1168.39 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.41it/s, est. speed input: 1307.07 toks/s, output: 1134.68 toks/s]
 24%|██▍       | 25/104 [23:15<1:11:36, 54.38s/it]



Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.61it/s, est. speed input: 1161.09 toks/s, output: 1116.80 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:16<00:00,  3.90it/s, est. speed input: 1254.96 toks/s, output: 1146.38 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:19<00:00,  3.29it/s, est. speed input: 1060.89 toks/s, output: 1105.88 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:19<00:00,  3.30it/s, est. speed input: 1061.04 toks/s, output: 1103.31 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:18<00:00,  3.48it/s, est. speed input: 1120.30 toks/s, output: 1105.51 toks/s]
 25%|██▌       | 26/104 [24:46<1:25:09, 65.51s/it]



Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.65it/s, est. speed input: 832.95 toks/s, output: 1192.86 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.68it/s, est. speed input: 842.65 toks/s, output: 1180.04 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.38it/s, est. speed input: 1005.86 toks/s, output: 1211.54 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.34it/s, est. speed input: 997.19 toks/s, output: 1207.26 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:15<00:00,  4.21it/s, est. speed input: 967.62 toks/s, output: 1150.40 toks/s]
 26%|██▌       | 27/104 [26:06<1:29:28, 69.72s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.17it/s, est. speed input: 1234.42 toks/s, output: 1120.62 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.59it/s, est. speed input: 1575.36 toks/s, output: 1102.39 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.53it/s, est. speed input: 1397.90 toks/s, output: 1094.95 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.72it/s, est. speed input: 1474.42 toks/s, output: 1056.71 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.49it/s, est. speed input: 1454.87 toks/s, output: 1155.49 toks/s]
 27%|██▋       | 28/104 [26:54<1:20:14, 63.34s/it]



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.75it/s, est. speed input: 1202.34 toks/s, output: 1185.17 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.72it/s, est. speed input: 1218.99 toks/s, output: 1222.39 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.44it/s, est. speed input: 1180.55 toks/s, output: 1177.66 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.67it/s, est. speed input: 1246.83 toks/s, output: 1166.06 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.14it/s, est. speed input: 1352.14 toks/s, output: 1116.94 toks/s]
 28%|██▊       | 29/104 [27:50<1:16:24, 61.13s/it]



Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.38it/s, est. speed input: 981.84 toks/s, output: 1172.51 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.53it/s, est. speed input: 1022.66 toks/s, output: 1107.53 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.44it/s, est. speed input: 1007.28 toks/s, output: 1140.89 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.44it/s, est. speed input: 1240.35 toks/s, output: 1172.18 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:15<00:00,  4.25it/s, est. speed input: 978.75 toks/s, output: 1153.74 toks/s]
 29%|██▉       | 30/104 [29:00<1:18:42, 63.81s/it]



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.66it/s, est. speed input: 1217.00 toks/s, output: 1152.87 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.51it/s, est. speed input: 997.82 toks/s, output: 1151.76 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.46it/s, est. speed input: 1225.90 toks/s, output: 1208.75 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.36it/s, est. speed input: 1208.17 toks/s, output: 1154.23 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.97it/s, est. speed input: 1366.36 toks/s, output: 1170.24 toks/s]
 30%|██▉       | 31/104 [30:01<1:16:18, 62.71s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.83it/s, est. speed input: 1212.17 toks/s, output: 1153.98 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.18it/s, est. speed input: 1303.03 toks/s, output: 1156.44 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.68it/s, est. speed input: 992.96 toks/s, output: 1120.18 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.00it/s, est. speed input: 1281.41 toks/s, output: 1218.98 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.17it/s, est. speed input: 1328.00 toks/s, output: 1082.44 toks/s]
 31%|███       | 32/104 [30:57<1:12:53, 60.75s/it]



Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.30it/s, est. speed input: 1394.37 toks/s, output: 1058.55 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.12it/s, est. speed input: 1204.35 toks/s, output: 1082.49 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.39it/s, est. speed input: 1299.25 toks/s, output: 1139.94 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  7.02it/s, est. speed input: 1457.66 toks/s, output: 1099.39 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.84it/s, est. speed input: 1871.86 toks/s, output: 1087.55 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.21it/s, est. speed input: 1272.32 toks/s, output: 1170.01 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.44it/s, est. speed input: 1351.91 toks/s, output: 1080.06 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.03it/s, est. speed input: 1311.96 toks/s, output: 1177.76 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.75it/s, est. speed input: 1753.20 toks/s, output: 1088.56 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.25it/s, est. speed input: 1455.55 toks/s, output: 1182.71 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.95it/s, est. speed input: 1581.43 toks/s, output: 1128.09 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.90it/s, est. speed input: 1206.28 toks/s, output: 1212.36 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.25it/s, est. speed input: 1314.36 toks/s, output: 1082.13 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.87it/s, est. speed input: 1472.38 toks/s, output: 1041.22 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.61it/s, est. speed input: 1649.59 toks/s, output: 1116.64 toks/s]
 34%|███▎      | 35/104 [33:19<59:55, 52.11s/it]  



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.39it/s, est. speed input: 1088.58 toks/s, output: 1196.19 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.24it/s, est. speed input: 1299.07 toks/s, output: 1110.29 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.43it/s, est. speed input: 1356.20 toks/s, output: 1133.43 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.41it/s, est. speed input: 1589.98 toks/s, output: 1138.05 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.34it/s, est. speed input: 1359.90 toks/s, output: 1143.93 toks/s]
 35%|███▍      | 36/104 [34:10<58:41, 51.78s/it]



Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.75it/s, est. speed input: 988.42 toks/s, output: 1106.85 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.48it/s, est. speed input: 1373.71 toks/s, output: 1084.24 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.11it/s, est. speed input: 1310.26 toks/s, output: 1174.27 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.69it/s, est. speed input: 1242.13 toks/s, output: 1244.62 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.51it/s, est. speed input: 1430.54 toks/s, output: 1117.06 toks/s]
 36%|███▌      | 37/104 [35:05<58:54, 52.75s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.33it/s, est. speed input: 1329.01 toks/s, output: 1098.91 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.93it/s, est. speed input: 1262.50 toks/s, output: 1181.87 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.83it/s, est. speed input: 1039.38 toks/s, output: 1180.85 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.17it/s, est. speed input: 1344.24 toks/s, output: 1206.75 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.51it/s, est. speed input: 1209.01 toks/s, output: 1192.81 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.76it/s, est. speed input: 1197.77 toks/s, output: 1205.60 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.04it/s, est. speed input: 1317.58 toks/s, output: 1147.38 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.68it/s, est. speed input: 1269.65 toks/s, output: 1188.32 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.16it/s, est. speed input: 1412.07 toks/s, output: 1153.80 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.45it/s, est. speed input: 1495.71 toks/s, output: 1118.01 toks/s]
 38%|███▊      | 39/104 [36:56<58:22, 53.88s/it]



Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.65it/s, est. speed input: 926.98 toks/s, output: 1119.89 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:18<00:00,  3.51it/s, est. speed input: 890.92 toks/s, output: 1118.08 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:18<00:00,  3.44it/s, est. speed input: 874.93 toks/s, output: 1171.59 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:19<00:00,  3.36it/s, est. speed input: 852.60 toks/s, output: 1177.72 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:19<00:00,  3.36it/s, est. speed input: 852.50 toks/s, output: 1197.04 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.80it/s, est. speed input: 1247.49 toks/s, output: 1203.61 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.67it/s, est. speed input: 1018.22 toks/s, output: 1109.75 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:15<00:00,  4.14it/s, est. speed input: 909.88 toks/s, output: 1198.05 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.90it/s, est. speed input: 1086.52 toks/s, output: 1157.71 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.46it/s, est. speed input: 997.99 toks/s, output: 1136.29 toks/s]
 39%|███▉      | 41/104 [39:36<1:09:28, 66.17s/it]



Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.42it/s, est. speed input: 1265.17 toks/s, output: 1096.08 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.51it/s, est. speed input: 1313.03 toks/s, output: 1102.13 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.19it/s, est. speed input: 1291.65 toks/s, output: 1137.15 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.28it/s, est. speed input: 1789.98 toks/s, output: 1129.17 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.62it/s, est. speed input: 1455.33 toks/s, output: 1045.92 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.39it/s, est. speed input: 1272.16 toks/s, output: 1086.66 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.27it/s, est. speed input: 1073.27 toks/s, output: 1185.59 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.56it/s, est. speed input: 1361.41 toks/s, output: 1126.90 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.39it/s, est. speed input: 1767.75 toks/s, output: 1150.84 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.88it/s, est. speed input: 1461.47 toks/s, output: 1114.45 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.37it/s, est. speed input: 1241.60 toks/s, output: 1164.20 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  9.10it/s, est. speed input: 1882.92 toks/s, output: 1058.62 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.25it/s, est. speed input: 1325.70 toks/s, output: 1184.27 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.81it/s, est. speed input: 1699.13 toks/s, output: 1068.23 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.43it/s, est. speed input: 1847.00 toks/s, output: 1138.25 toks/s]
 42%|████▏     | 44/104 [41:57<53:07, 53.13s/it]



Processed prompts: 100%|██████████| 64/64 [00:18<00:00,  3.52it/s, est. speed input: 867.15 toks/s, output: 1121.28 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.42it/s, est. speed input: 1095.46 toks/s, output: 1164.98 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.59it/s, est. speed input: 894.42 toks/s, output: 1116.79 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:18<00:00,  3.52it/s, est. speed input: 876.26 toks/s, output: 1135.84 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.74it/s, est. speed input: 931.36 toks/s, output: 1175.02 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.37it/s, est. speed input: 1241.59 toks/s, output: 1147.77 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.58it/s, est. speed input: 1319.96 toks/s, output: 1103.03 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.37it/s, est. speed input: 1323.48 toks/s, output: 1172.10 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.45it/s, est. speed input: 1581.36 toks/s, output: 1063.90 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.89it/s, est. speed input: 1693.11 toks/s, output: 1072.90 toks/s]
 44%|████▍     | 46/104 [44:09<56:11, 58.12s/it]  



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.35it/s, est. speed input: 1250.47 toks/s, output: 1068.97 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.54it/s, est. speed input: 1339.94 toks/s, output: 1002.42 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.34it/s, est. speed input: 1315.59 toks/s, output: 1128.34 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.79it/s, est. speed input: 1221.14 toks/s, output: 1204.12 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.28it/s, est. speed input: 1761.70 toks/s, output: 1097.37 toks/s]
 45%|████▌     | 47/104 [44:58<52:36, 55.38s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.86it/s, est. speed input: 1207.90 toks/s, output: 1138.63 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.20it/s, est. speed input: 1306.35 toks/s, output: 1121.72 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.54it/s, est. speed input: 1384.66 toks/s, output: 1099.55 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.31it/s, est. speed input: 1128.07 toks/s, output: 1129.48 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.44it/s, est. speed input: 1165.07 toks/s, output: 1211.08 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.27it/s, est. speed input: 1424.37 toks/s, output: 1126.64 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.77it/s, est. speed input: 1583.47 toks/s, output: 1090.14 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.91it/s, est. speed input: 1652.89 toks/s, output: 1195.19 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.23it/s, est. speed input: 1780.34 toks/s, output: 1112.32 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.33it/s, est. speed input: 1398.32 toks/s, output: 1111.34 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.63it/s, est. speed input: 1332.99 toks/s, output: 1087.10 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.36it/s, est. speed input: 1314.36 toks/s, output: 1110.48 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.48it/s, est. speed input: 1149.33 toks/s, output: 1181.97 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.29it/s, est. speed input: 1540.10 toks/s, output: 1093.35 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.94it/s, est. speed input: 1272.60 toks/s, output: 1169.53 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.28it/s, est. speed input: 1255.31 toks/s, output: 1096.23 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.74it/s, est. speed input: 1371.41 toks/s, output: 1098.56 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.34it/s, est. speed input: 1715.03 toks/s, output: 1121.10 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.29it/s, est. speed input: 1292.94 toks/s, output: 1106.76 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.77it/s, est. speed input: 1399.23 toks/s, output: 1023.80 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.05it/s, est. speed input: 1247.42 toks/s, output: 1078.24 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.00it/s, est. speed input: 1265.42 toks/s, output: 1197.01 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.99it/s, est. speed input: 1285.14 toks/s, output: 1130.33 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.01it/s, est. speed input: 1085.89 toks/s, output: 1172.16 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.32it/s, est. speed input: 1387.02 toks/s, output: 1190.02 toks/s]
 50%|█████     | 52/104 [49:10<44:47, 51.69s/it]



Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.54it/s, est. speed input: 981.01 toks/s, output: 1163.68 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.53it/s, est. speed input: 1215.52 toks/s, output: 1220.36 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.84it/s, est. speed input: 1065.66 toks/s, output: 1140.39 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.10it/s, est. speed input: 1133.10 toks/s, output: 1141.96 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.89it/s, est. speed input: 1084.53 toks/s, output: 1121.48 toks/s]
 51%|█████     | 53/104 [50:15<47:15, 55.59s/it]



Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.52it/s, est. speed input: 1362.27 toks/s, output: 1033.11 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.81it/s, est. speed input: 1227.65 toks/s, output: 1165.11 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.41it/s, est. speed input: 1154.64 toks/s, output: 1155.65 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.13it/s, est. speed input: 1319.98 toks/s, output: 1145.54 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.26it/s, est. speed input: 1362.63 toks/s, output: 1137.98 toks/s]
 52%|█████▏    | 54/104 [51:08<45:49, 54.99s/it]



Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.22it/s, est. speed input: 1619.05 toks/s, output: 1157.66 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.50it/s, est. speed input: 1336.44 toks/s, output: 1088.59 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  9.02it/s, est. speed input: 1912.24 toks/s, output: 1176.76 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.46it/s, est. speed input: 1406.46 toks/s, output: 1086.08 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.76it/s, est. speed input: 1475.01 toks/s, output: 1119.65 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.24it/s, est. speed input: 1279.45 toks/s, output: 1179.30 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.39it/s, est. speed input: 1364.50 toks/s, output: 1133.02 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.22it/s, est. speed input: 1354.91 toks/s, output: 1140.20 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.85it/s, est. speed input: 1314.33 toks/s, output: 1163.49 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.14it/s, est. speed input: 1406.06 toks/s, output: 1149.03 toks/s]
 54%|█████▍    | 56/104 [52:45<41:37, 52.02s/it]



Processed prompts: 100%|██████████| 64/64 [00:00<00:00, 2611.62it/s, est. speed input: 5370281.68 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:00<00:00, 2619.90it/s, est. speed input: 5386480.04 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:00<00:00, 2009.77it/s, est. speed input: 4143714.63 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:00<00:00, 2237.24it/s, est. speed input: 4598672.71 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:00<00:00, 2369.73it/s, est. speed input: 4871221.64 toks/s, output: 0.00 toks/s]
 55%|█████▍    | 57/104 [52:46<28:34, 36.49s/it]



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.46it/s, est. speed input: 1108.31 toks/s, output: 1199.76 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.25it/s, est. speed input: 1724.64 toks/s, output: 1102.71 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.57it/s, est. speed input: 1624.39 toks/s, output: 1133.17 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.21it/s, est. speed input: 1354.75 toks/s, output: 1092.32 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.30it/s, est. speed input: 1400.47 toks/s, output: 1150.94 toks/s]
 56%|█████▌    | 58/104 [53:34<30:45, 40.11s/it]



Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.66it/s, est. speed input: 1279.71 toks/s, output: 1128.08 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.63it/s, est. speed input: 1490.13 toks/s, output: 1160.91 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.90it/s, est. speed input: 1367.96 toks/s, output: 1093.74 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.67it/s, est. speed input: 1344.43 toks/s, output: 1025.80 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.73it/s, est. speed input: 1793.40 toks/s, output: 1112.23 toks/s]
 57%|█████▋    | 59/104 [54:19<31:07, 41.51s/it]



Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.27it/s, est. speed input: 1453.75 toks/s, output: 1093.72 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.87it/s, est. speed input: 1462.36 toks/s, output: 1048.67 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.65it/s, est. speed input: 1423.39 toks/s, output: 1078.17 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.86it/s, est. speed input: 1909.11 toks/s, output: 1112.77 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.88it/s, est. speed input: 1927.42 toks/s, output: 1088.45 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.57it/s, est. speed input: 1287.87 toks/s, output: 1094.34 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.90it/s, est. speed input: 1190.91 toks/s, output: 1250.62 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.11it/s, est. speed input: 1267.62 toks/s, output: 1192.38 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.08it/s, est. speed input: 1280.19 toks/s, output: 1229.70 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.08it/s, est. speed input: 1299.36 toks/s, output: 1161.00 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.42it/s, est. speed input: 994.18 toks/s, output: 1100.15 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.37it/s, est. speed input: 990.85 toks/s, output: 1170.49 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:15<00:00,  4.16it/s, est. speed input: 951.20 toks/s, output: 1187.80 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.47it/s, est. speed input: 1029.46 toks/s, output: 1162.37 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:16<00:00,  4.00it/s, est. speed input: 921.21 toks/s, output: 1190.76 toks/s]
 60%|█████▉    | 62/104 [57:09<37:46, 53.97s/it]



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.39it/s, est. speed input: 1120.88 toks/s, output: 1144.62 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.11it/s, est. speed input: 1689.64 toks/s, output: 1075.41 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.69it/s, est. speed input: 1185.00 toks/s, output: 1169.28 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.22it/s, est. speed input: 1308.22 toks/s, output: 1083.98 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.42it/s, est. speed input: 1358.23 toks/s, output: 1028.15 toks/s]
 61%|██████    | 63/104 [58:00<36:20, 53.19s/it]



Processed prompts: 100%|██████████| 64/64 [00:00<00:00, 2732.31it/s, est. speed input: 4151671.56 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:00<00:00, 2516.65it/s, est. speed input: 3823176.24 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:00<00:00, 2710.95it/s, est. speed input: 4119520.35 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:00<00:00, 2388.15it/s, est. speed input: 3627344.28 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:00<00:00, 2021.75it/s, est. speed input: 3078197.07 toks/s, output: 0.00 toks/s]
 62%|██████▏   | 64/104 [58:00<24:51, 37.29s/it]



Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.06it/s, est. speed input: 1063.58 toks/s, output: 1184.66 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.19it/s, est. speed input: 1102.14 toks/s, output: 1147.11 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.48it/s, est. speed input: 1178.37 toks/s, output: 1194.98 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.18it/s, est. speed input: 1121.87 toks/s, output: 1178.35 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.14it/s, est. speed input: 1330.51 toks/s, output: 1182.42 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.73it/s, est. speed input: 1145.84 toks/s, output: 1212.80 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.61it/s, est. speed input: 1136.48 toks/s, output: 1197.25 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.53it/s, est. speed input: 1179.89 toks/s, output: 1215.30 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.95it/s, est. speed input: 1716.35 toks/s, output: 1123.66 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.23it/s, est. speed input: 1369.20 toks/s, output: 1114.57 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.45it/s, est. speed input: 1277.69 toks/s, output: 1057.38 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.37it/s, est. speed input: 1283.34 toks/s, output: 1079.29 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.31it/s, est. speed input: 1699.78 toks/s, output: 1150.97 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.66it/s, est. speed input: 1590.32 toks/s, output: 1136.54 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.40it/s, est. speed input: 1340.25 toks/s, output: 1148.92 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:15<00:00,  4.10it/s, est. speed input: 1708.81 toks/s, output: 1069.61 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:15<00:00,  4.10it/s, est. speed input: 1712.84 toks/s, output: 1076.64 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.60it/s, est. speed input: 1507.94 toks/s, output: 1058.88 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.57it/s, est. speed input: 1502.49 toks/s, output: 1069.37 toks/s]








Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.65it/s, est. speed input: 1543.23 toks/s, output: 1003.96 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.32it/s, est. speed input: 1112.05 toks/s, output: 1146.88 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.43it/s, est. speed input: 1143.49 toks/s, output: 1128.23 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.12it/s, est. speed input: 1308.00 toks/s, output: 1159.86 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.00it/s, est. speed input: 1302.70 toks/s, output: 1204.56 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.58it/s, est. speed input: 1002.16 toks/s, output: 1108.49 toks/s]
 66%|██████▋   | 69/104 [1:03:03<33:59, 58.27s/it]



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.73it/s, est. speed input: 1243.24 toks/s, output: 1148.08 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.75it/s, est. speed input: 1044.55 toks/s, output: 1097.26 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.03it/s, est. speed input: 1348.74 toks/s, output: 1094.14 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.62it/s, est. speed input: 1043.25 toks/s, output: 1098.43 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.87it/s, est. speed input: 1107.89 toks/s, output: 1097.62 toks/s]
 67%|██████▋   | 70/104 [1:04:05<33:45, 59.57s/it]



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.63it/s, est. speed input: 1188.52 toks/s, output: 1228.74 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.86it/s, est. speed input: 1267.20 toks/s, output: 1181.43 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.37it/s, est. speed input: 1170.83 toks/s, output: 1174.69 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  4.96it/s, est. speed input: 1092.33 toks/s, output: 1125.52 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.75it/s, est. speed input: 1270.89 toks/s, output: 1218.79 toks/s]
 68%|██████▊   | 71/104 [1:05:04<32:36, 59.29s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.33it/s, est. speed input: 1240.05 toks/s, output: 1102.83 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  9.13it/s, est. speed input: 1834.02 toks/s, output: 1211.88 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.48it/s, est. speed input: 1321.95 toks/s, output: 1096.73 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.33it/s, est. speed input: 1308.90 toks/s, output: 1147.13 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.75it/s, est. speed input: 1615.10 toks/s, output: 1133.94 toks/s]
 69%|██████▉   | 72/104 [1:05:49<29:24, 55.15s/it]



Processed prompts: 100%|██████████| 64/64 [00:20<00:00,  3.12it/s, est. speed input: 856.06 toks/s, output: 1122.85 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:18<00:00,  3.46it/s, est. speed input: 954.87 toks/s, output: 1166.46 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.57it/s, est. speed input: 983.63 toks/s, output: 1169.87 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:21<00:00,  2.95it/s, est. speed input: 813.07 toks/s, output: 1154.47 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:19<00:00,  3.29it/s, est. speed input: 908.66 toks/s, output: 1147.89 toks/s]
 70%|███████   | 73/104 [1:07:27<35:09, 68.04s/it]



Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.49it/s, est. speed input: 1271.70 toks/s, output: 1085.97 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.66it/s, est. speed input: 1338.41 toks/s, output: 1019.30 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.83it/s, est. speed input: 1407.29 toks/s, output: 1020.35 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.25it/s, est. speed input: 1319.88 toks/s, output: 1077.13 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.37it/s, est. speed input: 1817.59 toks/s, output: 1162.91 toks/s]
 71%|███████   | 74/104 [1:08:14<30:51, 61.72s/it]



Processed prompts: 100%|██████████| 64/64 [00:15<00:00,  4.22it/s, est. speed input: 911.99 toks/s, output: 1182.67 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.00it/s, est. speed input: 1087.73 toks/s, output: 1128.51 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.47it/s, est. speed input: 978.07 toks/s, output: 1170.82 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.49it/s, est. speed input: 989.59 toks/s, output: 1148.90 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.21it/s, est. speed input: 1149.78 toks/s, output: 1175.35 toks/s]
 72%|███████▏  | 75/104 [1:09:23<30:52, 63.86s/it]



Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.50it/s, est. speed input: 1332.42 toks/s, output: 1072.02 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.74it/s, est. speed input: 1226.73 toks/s, output: 1186.00 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.65it/s, est. speed input: 1894.65 toks/s, output: 1105.47 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.43it/s, est. speed input: 1429.47 toks/s, output: 1090.50 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.02it/s, est. speed input: 1348.36 toks/s, output: 1129.61 toks/s]
 73%|███████▎  | 76/104 [1:10:12<27:44, 59.45s/it]



Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.44it/s, est. speed input: 1275.02 toks/s, output: 1104.17 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.45it/s, est. speed input: 1332.42 toks/s, output: 1070.29 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.26it/s, est. speed input: 1326.93 toks/s, output: 1160.15 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.67it/s, est. speed input: 1439.08 toks/s, output: 1091.42 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.26it/s, est. speed input: 1377.88 toks/s, output: 1105.61 toks/s]
 74%|███████▍  | 77/104 [1:11:03<25:30, 56.68s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.88it/s, est. speed input: 1153.24 toks/s, output: 1241.96 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.58it/s, est. speed input: 1311.36 toks/s, output: 1096.36 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.48it/s, est. speed input: 1321.20 toks/s, output: 1084.16 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.32it/s, est. speed input: 1298.50 toks/s, output: 1186.33 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.53it/s, est. speed input: 1401.37 toks/s, output: 1155.32 toks/s]
 75%|███████▌  | 78/104 [1:11:54<23:49, 54.97s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.06it/s, est. speed input: 1261.22 toks/s, output: 1147.43 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.19it/s, est. speed input: 1310.59 toks/s, output: 1138.63 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.04it/s, est. speed input: 1291.66 toks/s, output: 1184.65 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.79it/s, est. speed input: 1239.76 toks/s, output: 1246.10 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.14it/s, est. speed input: 1325.15 toks/s, output: 1183.64 toks/s]
 76%|███████▌  | 79/104 [1:12:47<22:40, 54.43s/it]



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.72it/s, est. speed input: 1230.56 toks/s, output: 1202.30 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  4.92it/s, est. speed input: 1071.99 toks/s, output: 1158.02 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.55it/s, est. speed input: 993.31 toks/s, output: 1147.74 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.76it/s, est. speed input: 1047.73 toks/s, output: 1123.65 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.78it/s, est. speed input: 1280.79 toks/s, output: 1205.59 toks/s]
 77%|███████▋  | 80/104 [1:13:50<22:47, 56.97s/it]



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.46it/s, est. speed input: 1108.23 toks/s, output: 1184.15 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.85it/s, est. speed input: 1223.92 toks/s, output: 1205.19 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.95it/s, est. speed input: 1273.28 toks/s, output: 1180.93 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.75it/s, est. speed input: 1259.10 toks/s, output: 1169.41 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.87it/s, est. speed input: 1299.02 toks/s, output: 1191.91 toks/s]
 78%|███████▊  | 81/104 [1:14:45<21:41, 56.59s/it]



Processed prompts: 100%|██████████| 64/64 [00:06<00:00,  9.33it/s, est. speed input: 1745.51 toks/s, output: 1061.92 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:06<00:00,  9.93it/s, est. speed input: 1942.83 toks/s, output: 1134.29 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.47it/s, est. speed input: 1736.17 toks/s, output: 1056.21 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:06<00:00,  9.48it/s, est. speed input: 1980.24 toks/s, output: 1068.14 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.10it/s, est. speed input: 1738.02 toks/s, output: 1069.14 toks/s]
 79%|███████▉  | 82/104 [1:15:21<18:28, 50.40s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.24it/s, est. speed input: 1284.68 toks/s, output: 1143.29 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.13it/s, est. speed input: 1272.03 toks/s, output: 1163.63 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.35it/s, est. speed input: 1109.42 toks/s, output: 1161.00 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.79it/s, est. speed input: 1408.95 toks/s, output: 1049.28 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.96it/s, est. speed input: 1235.43 toks/s, output: 1109.29 toks/s]
 80%|███████▉  | 83/104 [1:16:14<17:53, 51.14s/it]



Processed prompts: 100%|██████████| 64/64 [00:19<00:00,  3.22it/s, est. speed input: 879.47 toks/s, output: 1135.17 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:21<00:00,  2.99it/s, est. speed input: 819.14 toks/s, output: 1111.43 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:18<00:00,  3.50it/s, est. speed input: 963.73 toks/s, output: 1183.32 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:21<00:00,  2.99it/s, est. speed input: 827.50 toks/s, output: 1135.52 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:18<00:00,  3.53it/s, est. speed input: 977.86 toks/s, output: 1193.29 toks/s]
 81%|████████  | 84/104 [1:17:53<21:50, 65.53s/it]



Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  7.07it/s, est. speed input: 1435.90 toks/s, output: 1127.54 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.76it/s, est. speed input: 1211.93 toks/s, output: 1218.86 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.87it/s, est. speed input: 1280.30 toks/s, output: 1146.87 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.30it/s, est. speed input: 1398.16 toks/s, output: 1179.66 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.35it/s, est. speed input: 1438.25 toks/s, output: 1094.16 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.76it/s, est. speed input: 1004.35 toks/s, output: 1120.59 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.21it/s, est. speed input: 1099.83 toks/s, output: 1180.05 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.11it/s, est. speed input: 1288.48 toks/s, output: 1179.90 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.52it/s, est. speed input: 1165.39 toks/s, output: 1195.59 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.45it/s, est. speed input: 938.55 toks/s, output: 1174.16 toks/s]
 83%|████████▎ | 86/104 [1:19:47<18:28, 61.60s/it]



Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.23it/s, est. speed input: 1067.55 toks/s, output: 1159.05 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.92it/s, est. speed input: 1224.02 toks/s, output: 1088.03 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.19it/s, est. speed input: 1287.74 toks/s, output: 1137.46 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.19it/s, est. speed input: 1303.75 toks/s, output: 1145.87 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.28it/s, est. speed input: 1322.72 toks/s, output: 1103.75 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:16<00:00,  3.91it/s, est. speed input: 871.03 toks/s, output: 1202.36 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.77it/s, est. speed input: 1071.35 toks/s, output: 1104.07 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:15<00:00,  4.12it/s, est. speed input: 926.33 toks/s, output: 1171.50 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.41it/s, est. speed input: 995.91 toks/s, output: 1183.34 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.32it/s, est. speed input: 980.11 toks/s, output: 1198.58 toks/s]
 85%|████████▍ | 88/104 [1:21:56<17:03, 63.96s/it]



Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.10it/s, est. speed input: 1085.73 toks/s, output: 1178.20 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:15<00:00,  4.21it/s, est. speed input: 903.88 toks/s, output: 1225.49 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.46it/s, est. speed input: 963.08 toks/s, output: 1142.69 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.53it/s, est. speed input: 1194.50 toks/s, output: 1159.91 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.58it/s, est. speed input: 989.11 toks/s, output: 1098.24 toks/s] 
 86%|████████▌ | 89/104 [1:23:04<16:16, 65.08s/it]



Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.60it/s, est. speed input: 1332.90 toks/s, output: 1082.36 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.68it/s, est. speed input: 1389.78 toks/s, output: 1076.93 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.95it/s, est. speed input: 1257.31 toks/s, output: 1185.78 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.48it/s, est. speed input: 1174.50 toks/s, output: 1192.31 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.61it/s, est. speed input: 1202.58 toks/s, output: 1187.76 toks/s]
 87%|████████▋ | 90/104 [1:23:57<14:21, 61.53s/it]



Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.42it/s, est. speed input: 1132.53 toks/s, output: 1216.44 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.39it/s, est. speed input: 1130.11 toks/s, output: 1124.21 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:13<00:00,  4.57it/s, est. speed input: 967.20 toks/s, output: 1092.19 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.46it/s, est. speed input: 1159.11 toks/s, output: 1176.86 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.06it/s, est. speed input: 1085.45 toks/s, output: 1188.55 toks/s]
 88%|████████▊ | 91/104 [1:24:59<13:22, 61.73s/it]



Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.63it/s, est. speed input: 1287.25 toks/s, output: 1088.40 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.79it/s, est. speed input: 1352.75 toks/s, output: 1053.99 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.62it/s, est. speed input: 1363.01 toks/s, output: 1098.76 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.44it/s, est. speed input: 1368.96 toks/s, output: 1114.56 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.50it/s, est. speed input: 1386.33 toks/s, output: 1095.94 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  7.00it/s, est. speed input: 1394.03 toks/s, output: 1101.45 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.42it/s, est. speed input: 1332.81 toks/s, output: 1061.53 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.81it/s, est. speed input: 1234.32 toks/s, output: 1183.04 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.25it/s, est. speed input: 1365.15 toks/s, output: 1110.92 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.40it/s, est. speed input: 1415.31 toks/s, output: 1182.96 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.53it/s, est. speed input: 1273.46 toks/s, output: 1157.24 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.49it/s, est. speed input: 1533.58 toks/s, output: 1042.39 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.27it/s, est. speed input: 1544.89 toks/s, output: 1048.67 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.72it/s, est. speed input: 1459.53 toks/s, output: 1038.89 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.48it/s, est. speed input: 1417.42 toks/s, output: 1064.18 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 22.59it/s, est. speed input: 16849.90 toks/s, output: 493.74 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 23.30it/s, est. speed input: 17407.09 toks/s, output: 516.60 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 23.18it/s, est. speed input: 17319.84 toks/s, output: 511.47 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 23.28it/s, est. speed input: 17393.65 toks/s, output: 501.65 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:02<00:00, 22.98it/s, est. speed input: 17166.35 toks/s, output: 498.68 toks/s]
 91%|█████████▏| 95/104 [1:27:39<06:11, 41.33s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.01it/s, est. speed input: 1226.60 toks/s, output: 1128.42 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.11it/s, est. speed input: 1305.58 toks/s, output: 1148.35 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.26it/s, est. speed input: 1364.97 toks/s, output: 1143.03 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.16it/s, est. speed input: 1364.52 toks/s, output: 1123.51 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.87it/s, est. speed input: 1317.19 toks/s, output: 1169.53 toks/s]
 92%|█████████▏| 96/104 [1:28:33<05:59, 44.91s/it]



Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  4.94it/s, est. speed input: 1047.62 toks/s, output: 1171.16 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.68it/s, est. speed input: 1224.86 toks/s, output: 1110.39 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:14<00:00,  4.42it/s, est. speed input: 976.69 toks/s, output: 1183.85 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:12<00:00,  5.16it/s, est. speed input: 1149.29 toks/s, output: 1160.57 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.17it/s, est. speed input: 1375.31 toks/s, output: 1161.03 toks/s]
 93%|█████████▎| 97/104 [1:29:34<05:49, 49.95s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.09it/s, est. speed input: 1223.78 toks/s, output: 1232.72 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.45it/s, est. speed input: 1336.87 toks/s, output: 1160.28 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.50it/s, est. speed input: 1166.74 toks/s, output: 1171.20 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  7.02it/s, est. speed input: 1531.51 toks/s, output: 1061.39 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.58it/s, est. speed input: 1247.93 toks/s, output: 1189.47 toks/s]
 94%|█████████▍| 98/104 [1:30:28<05:05, 50.93s/it]



Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.92it/s, est. speed input: 1201.78 toks/s, output: 1200.85 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  5.96it/s, est. speed input: 1235.00 toks/s, output: 1173.43 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.26it/s, est. speed input: 1299.48 toks/s, output: 1190.67 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.61it/s, est. speed input: 1379.09 toks/s, output: 1099.51 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.62it/s, est. speed input: 1383.53 toks/s, output: 1056.60 toks/s]
 95%|█████████▌| 99/104 [1:31:19<04:15, 51.07s/it]



Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.44it/s, est. speed input: 1249.20 toks/s, output: 1082.98 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  8.24it/s, est. speed input: 1686.29 toks/s, output: 1127.28 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:07<00:00,  9.10it/s, est. speed input: 1918.13 toks/s, output: 1107.10 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.35it/s, est. speed input: 1358.69 toks/s, output: 1162.79 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.81it/s, est. speed input: 1464.30 toks/s, output: 1030.00 toks/s]
 96%|█████████▌| 100/104 [1:32:04<03:16, 49.15s/it]



Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.57it/s, est. speed input: 1102.08 toks/s, output: 1115.28 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:16<00:00,  3.89it/s, est. speed input: 1207.04 toks/s, output: 1143.07 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:17<00:00,  3.67it/s, est. speed input: 1141.68 toks/s, output: 1141.50 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:20<00:00,  3.18it/s, est. speed input: 990.53 toks/s, output: 1119.37 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:19<00:00,  3.22it/s, est. speed input: 1007.22 toks/s, output: 1088.79 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.22it/s, est. speed input: 1263.22 toks/s, output: 1126.80 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.80it/s, est. speed input: 1197.44 toks/s, output: 1194.99 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.72it/s, est. speed input: 1402.03 toks/s, output: 1100.20 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.43it/s, est. speed input: 1349.76 toks/s, output: 1066.87 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.39it/s, est. speed input: 1343.97 toks/s, output: 1110.30 toks/s]
 98%|█████████▊| 102/104 [1:34:27<01:57, 58.70s/it]



Processed prompts: 100%|██████████| 64/64 [00:08<00:00,  7.12it/s, est. speed input: 1460.44 toks/s, output: 1076.19 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:10<00:00,  6.23it/s, est. speed input: 1320.45 toks/s, output: 1167.92 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.49it/s, est. speed input: 1398.91 toks/s, output: 1048.83 toks/s]




Processed prompts: 100%|██████████| 64/64 [00:11<00:00,  5.72it/s, est. speed input: 1252.54 toks/s, output: 1257.46 toks/s]
Processed prompts: 100%|██████████| 64/64 [00:09<00:00,  6.60it/s, est. speed input: 1462.95 toks/s, output: 1131.23 toks/s]
Processed prompts: 100%|██████████| 43/43 [00:05<00:00,  7.18it/s, est. speed input: 1385.93 toks/s, output: 1113.88 toks/s]




Processed prompts: 100%|██████████| 43/43 [00:05<00:00,  8.12it/s, est. speed input: 1610.18 toks/s, output: 1048.63 toks/s]
Processed prompts: 100%|██████████| 43/43 [00:06<00:00,  6.77it/s, est. speed input: 1390.64 toks/s, output: 1098.71 toks/s]
Processed prompts: 100%|██████████| 43/43 [00:05<00:00,  7.17it/s, est. speed input: 1505.68 toks/s, output: 1082.06 toks/s]
Processed prompts: 100%|██████████| 43/43 [00:06<00:00,  7.13it/s, est. speed input: 1508.46 toks/s, output: 1184.66 toks/s]
100%|██████████| 104/104 [1:35:47<00:00, 55.26s/it]

Average BLEU score: 0.0789
Average tools used: 0.0561
Average calls per sample: 3.0968





## Model with SFT + RL

In [8]:
# from pretrained peft model
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from vllm import LLM, SamplingParams
from vllm.lora.request import LoRARequest


def load_model(model_name_or_path, peft_model_id):
    # Load the base model
    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto", torch_dtype='auto')
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

    # Load the PEFT model
    peft_model = PeftModel.from_pretrained(model, peft_model_id)
    
    return peft_model, tokenizer

model, tokenizer = load_model("Qwen/Qwen2.5-0.5B-Instruct", 'models/sft_base_qwen')
vllm_lora_adapter = 'models/grpo_policy_model'
base_model_name = "Qwen/Qwen2.5-0.5B-Instruct"

inference_engine = LLM(
    model=base_model_name,
    enable_lora=True,
    max_lora_rank=64,
    max_loras=1,
    gpu_memory_utilization=0.2,
    # enable_prefix_caching=True,
    swap_space=6,
    scheduling_policy="fcfs",
    dtype=torch.bfloat16,
    max_model_len=768,
    # enable_sleep_mode=True,
    )

dataloader = DataLoader(dataset, batch_size=48, shuffle=True)

# Evaluate the model
avg_bleu, tools_used_avg, calls_per_sample_avg = evaluate_model(inference_engine, tokenizer, dataloader, actions_num=4, lora_request=LoRARequest('adapter', 1, vllm_lora_adapter), tools=TOOLS)
print(f"Average BLEU score: {avg_bleu:.4f}")
print(f"Average tools used: {tools_used_avg:.4f}")
print(f"Average calls per sample: {calls_per_sample_avg:.4f}")

[2025-05-09 19:02:54,546] [INFO] [real_accelerator.py:239:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


INFO 05-09 19:03:00 __init__.py:207] Automatically detected platform cuda.
INFO 05-09 19:03:05 config.py:549] This model supports multiple tasks: {'classify', 'generate', 'reward', 'score', 'embed'}. Defaulting to 'generate'.
INFO 05-09 19:03:05 llm_engine.py:234] Initializing a V0 LLM engine (v0.7.3) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=Fal

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]
Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:00<00:00,  3.36it/s]
Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:00<00:00,  3.35it/s]


INFO 05-09 19:03:07 model_runner.py:1115] Loading model weights took 0.9254 GB
INFO 05-09 19:03:07 punica_selector.py:18] Using PunicaWrapperGPU.





INFO 05-09 19:03:09 worker.py:267] Memory profiling takes 2.08 seconds
INFO 05-09 19:03:09 worker.py:267] the current vLLM instance can use total_gpu_memory (11.99GiB) x gpu_memory_utilization (0.20) = 2.40GiB
INFO 05-09 19:03:09 worker.py:267] model weights take 0.93GiB; non_torch_memory takes 0.02GiB; PyTorch activation peak memory takes 1.39GiB; the rest of the memory reserved for KV Cache is 0.07GiB.
INFO 05-09 19:03:09 executor_base.py:111] # cuda blocks: 375, # CPU blocks: 32768
INFO 05-09 19:03:09 executor_base.py:116] Maximum concurrency for 768 tokens per request: 7.81x
INFO 05-09 19:03:10 model_runner.py:1434] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as nee

Capturing CUDA graph shapes: 100%|██████████| 35/35 [00:11<00:00,  3.03it/s]

INFO 05-09 19:03:21 model_runner.py:1562] Graph capturing finished in 12 secs, took 0.31 GiB
INFO 05-09 19:03:21 llm_engine.py:436] init engine (profile, create kv cache, warmup model) took 14.72 seconds



  0%|          | 0/139 [00:00<?, ?it/s]



  outputs, tools_used, how_many_tool_calls = generate_batch_completion(model, tokenizer, inputs, actions_num=actions_num, lora_request=lora_request, tools=tools)




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.15it/s, est. speed input: 1756.78 toks/s, output: 875.72 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.35it/s, est. speed input: 1603.90 toks/s, output: 1025.23 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.34it/s, est. speed input: 1601.79 toks/s, output: 1080.37 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.62it/s, est. speed input: 1654.13 toks/s, output: 1118.37 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 11.85it/s, est. speed input: 2277.06 toks/s, output: 751.60 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.35it/s, est. speed input: 1696.01 toks/s, output: 1170.53 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.17it/s, est. speed input: 1861.40 toks/s, output: 1015.32 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.34it/s, est. speed input: 1897.49 toks/s, output: 882.34 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.71it/s, est. speed input: 1565.27 toks/s, output: 996.28 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.53it/s, est. speed input: 1528.04 toks/s, output: 1096.63 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.93it/s, est. speed input: 1927.24 toks/s, output: 965.48 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:03<00:00, 14.25it/s, est. speed input: 2779.42 toks/s, output: 1037.75 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.08it/s, est. speed input: 1771.58 toks/s, output: 991.23 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.50it/s, est. speed input: 1657.91 toks/s, output: 1033.58 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.31it/s, est. speed input: 1814.46 toks/s, output: 1017.55 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.36it/s, est. speed input: 1623.81 toks/s, output: 1087.67 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.24it/s, est. speed input: 1588.31 toks/s, output: 1103.43 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.04it/s, est. speed input: 1224.77 toks/s, output: 1036.56 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:12<00:00,  3.90it/s, est. speed input: 1183.21 toks/s, output: 1096.90 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:12<00:00,  3.97it/s, est. speed input: 1202.64 toks/s, output: 1072.16 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.40it/s, est. speed input: 1966.51 toks/s, output: 903.48 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.99it/s, est. speed input: 1699.14 toks/s, output: 970.75 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.81it/s, est. speed input: 1854.02 toks/s, output: 966.45 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.86it/s, est. speed input: 1863.78 toks/s, output: 992.70 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.72it/s, est. speed input: 1836.68 toks/s, output: 911.05 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.84it/s, est. speed input: 1741.25 toks/s, output: 920.90 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.87it/s, est. speed input: 1551.53 toks/s, output: 1182.19 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.27it/s, est. speed input: 1433.30 toks/s, output: 1169.40 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.95it/s, est. speed input: 1959.54 toks/s, output: 929.20 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.72it/s, est. speed input: 1914.71 toks/s, output: 955.33 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.34it/s, est. speed input: 2027.46 toks/s, output: 960.28 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.02it/s, est. speed input: 1585.05 toks/s, output: 1140.55 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.66it/s, est. speed input: 1929.78 toks/s, output: 930.16 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.52it/s, est. speed input: 1702.60 toks/s, output: 1105.70 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.43it/s, est. speed input: 2083.70 toks/s, output: 910.90 toks/s]
  5%|▌         | 7/139 [03:32<1:04:43, 29.42s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.79it/s, est. speed input: 1740.74 toks/s, output: 1021.84 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.24it/s, est. speed input: 1649.98 toks/s, output: 1154.45 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.03it/s, est. speed input: 1808.75 toks/s, output: 1011.15 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.35it/s, est. speed input: 1872.95 toks/s, output: 1059.90 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.18it/s, est. speed input: 1638.05 toks/s, output: 1125.99 toks/s]
  6%|▌         | 8/139 [04:00<1:03:01, 28.86s/it]



Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.07it/s, est. speed input: 1491.11 toks/s, output: 1083.15 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.04it/s, est. speed input: 1696.26 toks/s, output: 1213.91 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.26it/s, est. speed input: 1532.39 toks/s, output: 1162.00 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.73it/s, est. speed input: 1421.01 toks/s, output: 1084.84 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.59it/s, est. speed input: 1178.82 toks/s, output: 1080.47 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.88it/s, est. speed input: 2110.52 toks/s, output: 1047.78 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.11it/s, est. speed input: 1993.27 toks/s, output: 926.69 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.64it/s, est. speed input: 1900.09 toks/s, output: 863.70 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.66it/s, est. speed input: 1707.99 toks/s, output: 968.07 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.16it/s, est. speed input: 1806.88 toks/s, output: 991.85 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.19it/s, est. speed input: 1035.67 toks/s, output: 1107.47 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.06it/s, est. speed input: 1249.53 toks/s, output: 1113.79 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.10it/s, est. speed input: 1258.75 toks/s, output: 1101.08 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.01it/s, est. speed input: 1237.30 toks/s, output: 1119.06 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:10<00:00,  4.44it/s, est. speed input: 1097.35 toks/s, output: 1165.29 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.96it/s, est. speed input: 1742.63 toks/s, output: 1122.46 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.31it/s, est. speed input: 1381.11 toks/s, output: 1050.68 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.48it/s, est. speed input: 1857.72 toks/s, output: 1070.23 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  6.87it/s, est. speed input: 1504.51 toks/s, output: 1052.67 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.80it/s, est. speed input: 1709.32 toks/s, output: 1056.78 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.32it/s, est. speed input: 1742.85 toks/s, output: 923.85 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.30it/s, est. speed input: 1982.09 toks/s, output: 964.34 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.02it/s, est. speed input: 1952.76 toks/s, output: 941.41 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.46it/s, est. speed input: 2037.77 toks/s, output: 916.61 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.62it/s, est. speed input: 2069.11 toks/s, output: 827.20 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.10it/s, est. speed input: 1685.61 toks/s, output: 1161.90 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.16it/s, est. speed input: 1715.07 toks/s, output: 1010.74 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.78it/s, est. speed input: 1844.67 toks/s, output: 1054.93 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.90it/s, est. speed input: 1870.32 toks/s, output: 1136.54 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.39it/s, est. speed input: 1343.25 toks/s, output: 1119.37 toks/s]
 10%|█         | 14/139 [07:17<1:04:45, 31.08s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.79it/s, est. speed input: 1775.47 toks/s, output: 1104.54 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.14it/s, est. speed input: 1853.50 toks/s, output: 1069.59 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.66it/s, est. speed input: 1756.42 toks/s, output: 1009.96 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.51it/s, est. speed input: 1320.15 toks/s, output: 1116.54 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.25it/s, est. speed input: 1673.72 toks/s, output: 1199.91 toks/s]
 11%|█         | 15/139 [07:47<1:03:15, 30.61s/it]



Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.00it/s, est. speed input: 2069.50 toks/s, output: 990.38 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.57it/s, est. speed input: 1152.86 toks/s, output: 984.26 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.06it/s, est. speed input: 1462.04 toks/s, output: 1122.58 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.39it/s, est. speed input: 1530.44 toks/s, output: 1119.33 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.84it/s, est. speed input: 1208.41 toks/s, output: 1077.06 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.06it/s, est. speed input: 1711.78 toks/s, output: 1014.57 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.73it/s, est. speed input: 1839.12 toks/s, output: 969.22 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.53it/s, est. speed input: 1611.88 toks/s, output: 1066.06 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.78it/s, est. speed input: 1848.71 toks/s, output: 943.51 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.38it/s, est. speed input: 1772.35 toks/s, output: 936.38 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.84it/s, est. speed input: 1776.74 toks/s, output: 1093.88 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.41it/s, est. speed input: 1707.71 toks/s, output: 1112.41 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.11it/s, est. speed input: 1665.46 toks/s, output: 1054.22 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.70it/s, est. speed input: 1786.70 toks/s, output: 975.88 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  6.93it/s, est. speed input: 1422.65 toks/s, output: 1133.21 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.07it/s, est. speed input: 1478.29 toks/s, output: 1114.46 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.79it/s, est. speed input: 1837.91 toks/s, output: 999.20 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.00it/s, est. speed input: 1462.16 toks/s, output: 1119.21 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.91it/s, est. speed input: 1234.66 toks/s, output: 1122.04 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.93it/s, est. speed input: 1657.48 toks/s, output: 1172.39 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:13<00:00,  3.56it/s, est. speed input: 1053.13 toks/s, output: 1142.74 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  4.95it/s, est. speed input: 1466.33 toks/s, output: 1111.41 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:10<00:00,  4.65it/s, est. speed input: 1377.90 toks/s, output: 1093.26 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:12<00:00,  3.91it/s, est. speed input: 1158.14 toks/s, output: 1083.48 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:12<00:00,  3.76it/s, est. speed input: 1112.71 toks/s, output: 1107.30 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.91it/s, est. speed input: 1738.04 toks/s, output: 962.97 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.79it/s, est. speed input: 1713.49 toks/s, output: 1023.33 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.84it/s, est. speed input: 1919.22 toks/s, output: 828.17 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.50it/s, est. speed input: 1658.02 toks/s, output: 1066.55 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.06it/s, est. speed input: 1571.54 toks/s, output: 1064.65 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.84it/s, est. speed input: 1638.16 toks/s, output: 1058.30 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.32it/s, est. speed input: 1119.97 toks/s, output: 1052.71 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.74it/s, est. speed input: 1419.72 toks/s, output: 1092.71 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.57it/s, est. speed input: 1593.64 toks/s, output: 1056.28 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.25it/s, est. speed input: 1737.94 toks/s, output: 1063.36 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.58it/s, est. speed input: 1673.48 toks/s, output: 1086.33 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.23it/s, est. speed input: 1630.71 toks/s, output: 1133.39 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.02it/s, est. speed input: 1985.61 toks/s, output: 896.85 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.41it/s, est. speed input: 1863.36 toks/s, output: 1074.83 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.63it/s, est. speed input: 1709.88 toks/s, output: 1007.33 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.20it/s, est. speed input: 1998.45 toks/s, output: 943.99 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.00it/s, est. speed input: 1811.01 toks/s, output: 991.08 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.56it/s, est. speed input: 1735.44 toks/s, output: 1018.25 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.62it/s, est. speed input: 1747.60 toks/s, output: 952.82 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.61it/s, est. speed input: 1746.13 toks/s, output: 1119.64 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.64it/s, est. speed input: 1496.85 toks/s, output: 1154.14 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.92it/s, est. speed input: 1760.49 toks/s, output: 999.14 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.69it/s, est. speed input: 1715.79 toks/s, output: 1055.61 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.01it/s, est. speed input: 1779.30 toks/s, output: 1029.16 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.01it/s, est. speed input: 1778.30 toks/s, output: 1054.11 toks/s]
 18%|█▊        | 25/139 [13:13<57:10, 30.09s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.64it/s, est. speed input: 1598.20 toks/s, output: 1181.91 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.00it/s, est. speed input: 1869.55 toks/s, output: 958.96 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.28it/s, est. speed input: 1735.02 toks/s, output: 1096.96 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.08it/s, est. speed input: 1697.18 toks/s, output: 1018.91 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.68it/s, est. speed input: 1622.39 toks/s, output: 1114.39 toks/s]
 19%|█▊        | 26/139 [13:39<54:34, 28.98s/it]



Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.78it/s, est. speed input: 1594.87 toks/s, output: 1099.23 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.63it/s, est. speed input: 1153.37 toks/s, output: 1027.95 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.17it/s, est. speed input: 1676.19 toks/s, output: 1039.44 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.31it/s, est. speed input: 2113.28 toks/s, output: 904.80 toks/s] 
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.33it/s, est. speed input: 1297.29 toks/s, output: 1105.46 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.93it/s, est. speed input: 1896.57 toks/s, output: 944.14 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.09it/s, est. speed input: 1756.83 toks/s, output: 1105.41 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.34it/s, est. speed input: 2016.16 toks/s, output: 920.04 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.12it/s, est. speed input: 1972.35 toks/s, output: 874.33 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:02<00:00, 16.85it/s, est. speed input: 3284.93 toks/s, output: 1098.02 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.15it/s, est. speed input: 1998.80 toks/s, output: 1007.22 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.27it/s, est. speed input: 1629.83 toks/s, output: 1115.51 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.23it/s, est. speed input: 1818.86 toks/s, output: 918.08 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.13it/s, est. speed input: 1798.93 toks/s, output: 1044.24 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.18it/s, est. speed input: 1808.86 toks/s, output: 1033.55 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:03<00:00, 13.38it/s, est. speed input: 2448.62 toks/s, output: 797.25 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 11.67it/s, est. speed input: 2172.07 toks/s, output: 847.92 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.27it/s, est. speed input: 1725.66 toks/s, output: 971.85 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.07it/s, est. speed input: 1874.74 toks/s, output: 953.00 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 11.85it/s, est. speed input: 2205.67 toks/s, output: 789.94 toks/s]
 22%|██▏       | 30/139 [15:22<46:37, 25.67s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.03it/s, est. speed input: 1797.81 toks/s, output: 1005.24 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.79it/s, est. speed input: 1781.34 toks/s, output: 1013.35 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.18it/s, est. speed input: 1861.05 toks/s, output: 935.30 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.83it/s, est. speed input: 1586.39 toks/s, output: 1175.60 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.20it/s, est. speed input: 1662.72 toks/s, output: 1169.27 toks/s]
 22%|██▏       | 31/139 [15:50<47:29, 26.39s/it]



Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.57it/s, est. speed input: 1425.91 toks/s, output: 1124.88 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  6.98it/s, est. speed input: 1515.62 toks/s, output: 1094.08 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.09it/s, est. speed input: 1537.79 toks/s, output: 1128.53 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.42it/s, est. speed input: 1176.43 toks/s, output: 1027.12 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.19it/s, est. speed input: 1560.31 toks/s, output: 1157.35 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.57it/s, est. speed input: 2040.51 toks/s, output: 839.20 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.77it/s, est. speed input: 1886.63 toks/s, output: 1043.71 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.15it/s, est. speed input: 1572.65 toks/s, output: 1127.37 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.43it/s, est. speed input: 1820.72 toks/s, output: 1019.04 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.56it/s, est. speed input: 1460.35 toks/s, output: 1149.64 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  4.96it/s, est. speed input: 1595.86 toks/s, output: 1046.35 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:10<00:00,  4.37it/s, est. speed input: 1405.76 toks/s, output: 1002.30 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.05it/s, est. speed input: 1302.92 toks/s, output: 1054.49 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.66it/s, est. speed input: 1823.06 toks/s, output: 1056.97 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.23it/s, est. speed input: 1361.67 toks/s, output: 1026.45 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.69it/s, est. speed input: 1765.12 toks/s, output: 1003.56 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.04it/s, est. speed input: 1834.54 toks/s, output: 1046.61 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.03it/s, est. speed input: 1629.57 toks/s, output: 1090.89 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.03it/s, est. speed input: 1630.55 toks/s, output: 1065.61 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.69it/s, est. speed input: 1358.85 toks/s, output: 1131.12 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.66it/s, est. speed input: 1746.60 toks/s, output: 1117.48 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.97it/s, est. speed input: 1362.02 toks/s, output: 1021.14 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.14it/s, est. speed input: 1172.44 toks/s, output: 1082.34 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.36it/s, est. speed input: 1450.37 toks/s, output: 1053.05 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.26it/s, est. speed input: 1198.48 toks/s, output: 1078.45 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.63it/s, est. speed input: 1725.11 toks/s, output: 1022.84 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.38it/s, est. speed input: 1675.36 toks/s, output: 1072.40 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.55it/s, est. speed input: 1911.00 toks/s, output: 961.67 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.96it/s, est. speed input: 1791.63 toks/s, output: 1043.25 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.14it/s, est. speed input: 1827.41 toks/s, output: 1005.07 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.10it/s, est. speed input: 1691.98 toks/s, output: 1039.44 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.58it/s, est. speed input: 1585.18 toks/s, output: 1112.57 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.50it/s, est. speed input: 1776.71 toks/s, output: 1016.93 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.52it/s, est. speed input: 1780.84 toks/s, output: 1097.04 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.99it/s, est. speed input: 1877.98 toks/s, output: 1108.40 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.53it/s, est. speed input: 1238.75 toks/s, output: 1028.84 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.58it/s, est. speed input: 1474.49 toks/s, output: 1022.48 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.55it/s, est. speed input: 1466.61 toks/s, output: 1047.98 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.58it/s, est. speed input: 1698.37 toks/s, output: 1083.12 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.38it/s, est. speed input: 1204.87 toks/s, output: 1061.21 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.34it/s, est. speed input: 1803.44 toks/s, output: 1004.89 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.04it/s, est. speed input: 1771.97 toks/s, output: 1056.62 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:02<00:00, 17.61it/s, est. speed input: 3451.51 toks/s, output: 1026.68 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.72it/s, est. speed input: 1904.48 toks/s, output: 1005.00 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.54it/s, est. speed input: 1868.76 toks/s, output: 1009.60 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.08it/s, est. speed input: 1952.03 toks/s, output: 935.91 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.28it/s, est. speed input: 1135.98 toks/s, output: 1082.92 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.44it/s, est. speed input: 1600.31 toks/s, output: 1032.14 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.11it/s, est. speed input: 1744.26 toks/s, output: 1172.47 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.69it/s, est. speed input: 1438.50 toks/s, output: 1058.52 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.50it/s, est. speed input: 1560.09 toks/s, output: 1104.12 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.86it/s, est. speed input: 1843.21 toks/s, output: 927.14 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.68it/s, est. speed input: 1180.66 toks/s, output: 1024.44 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.29it/s, est. speed input: 1308.52 toks/s, output: 1032.24 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.84it/s, est. speed input: 1631.70 toks/s, output: 1126.86 toks/s]
 30%|███       | 42/139 [22:02<52:23, 32.41s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.21it/s, est. speed input: 1666.56 toks/s, output: 1096.84 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.83it/s, est. speed input: 1397.70 toks/s, output: 1089.14 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.68it/s, est. speed input: 1981.66 toks/s, output: 823.32 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.42it/s, est. speed input: 1929.35 toks/s, output: 999.72 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.59it/s, est. speed input: 1758.51 toks/s, output: 1080.52 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.85it/s, est. speed input: 2072.17 toks/s, output: 870.64 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.61it/s, est. speed input: 1852.25 toks/s, output: 1028.36 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.48it/s, est. speed input: 2020.83 toks/s, output: 885.55 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.79it/s, est. speed input: 2079.85 toks/s, output: 888.92 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.96it/s, est. speed input: 1726.43 toks/s, output: 1004.79 toks/s]
 32%|███▏      | 44/139 [22:55<45:57, 29.03s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.30it/s, est. speed input: 1700.80 toks/s, output: 1084.95 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.68it/s, est. speed input: 1789.30 toks/s, output: 1003.83 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.56it/s, est. speed input: 1971.35 toks/s, output: 933.49 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.78it/s, est. speed input: 1810.03 toks/s, output: 1052.22 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.41it/s, est. speed input: 2146.85 toks/s, output: 828.11 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.73it/s, est. speed input: 1736.39 toks/s, output: 1023.80 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.79it/s, est. speed input: 1749.04 toks/s, output: 974.86 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.59it/s, est. speed input: 1709.72 toks/s, output: 953.30 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.36it/s, est. speed input: 1663.13 toks/s, output: 1057.74 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.06it/s, est. speed input: 1803.40 toks/s, output: 1096.73 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.39it/s, est. speed input: 1694.56 toks/s, output: 1100.34 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.98it/s, est. speed input: 1819.40 toks/s, output: 986.26 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.04it/s, est. speed input: 1830.99 toks/s, output: 1042.64 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.80it/s, est. speed input: 1782.63 toks/s, output: 1052.70 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.27it/s, est. speed input: 1877.04 toks/s, output: 1028.70 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.37it/s, est. speed input: 1408.82 toks/s, output: 1176.47 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.77it/s, est. speed input: 1865.22 toks/s, output: 1071.77 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.22it/s, est. speed input: 1951.48 toks/s, output: 906.35 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.32it/s, est. speed input: 1780.51 toks/s, output: 1027.36 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.75it/s, est. speed input: 1480.14 toks/s, output: 1152.89 toks/s]
 35%|███▍      | 48/139 [24:43<42:00, 27.69s/it]



Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.87it/s, est. speed input: 1605.93 toks/s, output: 1131.30 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.64it/s, est. speed input: 1559.60 toks/s, output: 1081.62 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.48it/s, est. speed input: 1730.83 toks/s, output: 1125.61 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.46it/s, est. speed input: 1521.39 toks/s, output: 1128.92 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.29it/s, est. speed input: 1894.29 toks/s, output: 1059.93 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.80it/s, est. speed input: 1414.51 toks/s, output: 1194.62 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.56it/s, est. speed input: 1780.18 toks/s, output: 1107.79 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.23it/s, est. speed input: 1504.90 toks/s, output: 1124.45 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.36it/s, est. speed input: 1322.62 toks/s, output: 1082.45 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.06it/s, est. speed input: 1884.15 toks/s, output: 1067.57 toks/s]
 36%|███▌      | 50/139 [25:45<43:41, 29.46s/it]



Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.89it/s, est. speed input: 1657.89 toks/s, output: 1165.29 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.82it/s, est. speed input: 1649.37 toks/s, output: 1109.94 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.03it/s, est. speed input: 1483.33 toks/s, output: 1107.59 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.53it/s, est. speed input: 1590.17 toks/s, output: 1127.56 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.25it/s, est. speed input: 1318.87 toks/s, output: 1050.67 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.25it/s, est. speed input: 1470.87 toks/s, output: 1192.97 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.17it/s, est. speed input: 1880.98 toks/s, output: 1030.00 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.27it/s, est. speed input: 1695.87 toks/s, output: 1100.03 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.79it/s, est. speed input: 2008.74 toks/s, output: 1045.07 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.19it/s, est. speed input: 1681.09 toks/s, output: 1078.66 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.07it/s, est. speed input: 1201.59 toks/s, output: 1138.38 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.73it/s, est. speed input: 1926.02 toks/s, output: 1052.78 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.17it/s, est. speed input: 2014.53 toks/s, output: 964.23 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.18it/s, est. speed input: 1818.31 toks/s, output: 977.84 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.83it/s, est. speed input: 1748.40 toks/s, output: 1055.40 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.09it/s, est. speed input: 1293.32 toks/s, output: 1131.44 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.15it/s, est. speed input: 1307.02 toks/s, output: 1127.78 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.39it/s, est. speed input: 1368.35 toks/s, output: 1051.07 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:10<00:00,  4.63it/s, est. speed input: 1175.40 toks/s, output: 1160.55 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:10<00:00,  4.75it/s, est. speed input: 1206.07 toks/s, output: 1138.31 toks/s]
 39%|███▉      | 54/139 [28:03<49:40, 35.07s/it]



Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.41it/s, est. speed input: 1377.79 toks/s, output: 1049.49 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.16it/s, est. speed input: 1323.81 toks/s, output: 1007.48 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.71it/s, est. speed input: 1657.01 toks/s, output: 1076.09 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.41it/s, est. speed input: 1377.92 toks/s, output: 1099.93 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.24it/s, est. speed input: 1773.03 toks/s, output: 1078.25 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.11it/s, est. speed input: 1598.85 toks/s, output: 1121.36 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.14it/s, est. speed input: 1823.37 toks/s, output: 1142.65 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.75it/s, est. speed input: 1746.47 toks/s, output: 1001.99 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.51it/s, est. speed input: 1698.46 toks/s, output: 1000.87 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.36it/s, est. speed input: 1668.43 toks/s, output: 997.47 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.40it/s, est. speed input: 1823.51 toks/s, output: 1079.77 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.01it/s, est. speed input: 1959.85 toks/s, output: 937.99 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.11it/s, est. speed input: 1978.80 toks/s, output: 894.82 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.19it/s, est. speed input: 1603.09 toks/s, output: 1144.55 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  6.87it/s, est. speed input: 1344.29 toks/s, output: 1140.80 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.16it/s, est. speed input: 1823.57 toks/s, output: 1015.07 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.80it/s, est. speed input: 2149.37 toks/s, output: 847.41 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.97it/s, est. speed input: 1784.75 toks/s, output: 971.59 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.67it/s, est. speed input: 1924.50 toks/s, output: 870.78 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.48it/s, est. speed input: 1488.79 toks/s, output: 1160.06 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.01it/s, est. speed input: 1233.49 toks/s, output: 1087.45 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.55it/s, est. speed input: 1612.22 toks/s, output: 1068.67 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:10<00:00,  4.78it/s, est. speed input: 1175.27 toks/s, output: 1168.60 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  4.84it/s, est. speed input: 1190.08 toks/s, output: 1144.32 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  4.97it/s, est. speed input: 1223.79 toks/s, output: 1148.44 toks/s]
 42%|████▏     | 59/139 [30:47<46:29, 34.87s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.29it/s, est. speed input: 1823.27 toks/s, output: 1088.95 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.74it/s, est. speed input: 1262.28 toks/s, output: 1036.72 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.75it/s, est. speed input: 1264.89 toks/s, output: 1022.93 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.13it/s, est. speed input: 1787.69 toks/s, output: 1135.76 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.85it/s, est. speed input: 1946.94 toks/s, output: 1122.07 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.44it/s, est. speed input: 1841.42 toks/s, output: 1045.63 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.26it/s, est. speed input: 1621.43 toks/s, output: 1035.81 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.94it/s, est. speed input: 1558.71 toks/s, output: 1099.97 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.10it/s, est. speed input: 1786.65 toks/s, output: 982.06 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.43it/s, est. speed input: 1262.16 toks/s, output: 1131.15 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.60it/s, est. speed input: 1693.39 toks/s, output: 1056.94 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.77it/s, est. speed input: 1568.44 toks/s, output: 1104.25 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.08it/s, est. speed input: 2040.96 toks/s, output: 983.09 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.09it/s, est. speed input: 1839.76 toks/s, output: 1152.35 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.97it/s, est. speed input: 1816.36 toks/s, output: 967.80 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.67it/s, est. speed input: 1313.72 toks/s, output: 1079.48 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.35it/s, est. speed input: 1644.86 toks/s, output: 1066.31 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.01it/s, est. speed input: 1775.28 toks/s, output: 1008.92 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.71it/s, est. speed input: 1716.11 toks/s, output: 1036.45 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.85it/s, est. speed input: 1742.96 toks/s, output: 964.56 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.32it/s, est. speed input: 1714.48 toks/s, output: 1149.22 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.79it/s, est. speed input: 1815.43 toks/s, output: 1044.61 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.77it/s, est. speed input: 1822.77 toks/s, output: 1086.10 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.20it/s, est. speed input: 1704.52 toks/s, output: 1041.26 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.73it/s, est. speed input: 2023.02 toks/s, output: 1043.64 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.74it/s, est. speed input: 1713.16 toks/s, output: 1125.72 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.72it/s, est. speed input: 1713.94 toks/s, output: 1159.16 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.57it/s, est. speed input: 1879.71 toks/s, output: 897.01 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.87it/s, est. speed input: 1742.36 toks/s, output: 1051.84 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.83it/s, est. speed input: 1735.23 toks/s, output: 1065.94 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.66it/s, est. speed input: 1826.43 toks/s, output: 1052.33 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.58it/s, est. speed input: 1833.56 toks/s, output: 966.58 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.11it/s, est. speed input: 1567.87 toks/s, output: 1131.52 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.69it/s, est. speed input: 1887.70 toks/s, output: 968.57 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.32it/s, est. speed input: 1621.74 toks/s, output: 1018.98 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.30it/s, est. speed input: 1669.08 toks/s, output: 1094.03 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.98it/s, est. speed input: 1638.57 toks/s, output: 1138.12 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.25it/s, est. speed input: 1693.14 toks/s, output: 1120.68 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.62it/s, est. speed input: 1563.84 toks/s, output: 1156.05 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.89it/s, est. speed input: 1824.04 toks/s, output: 976.63 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.32it/s, est. speed input: 1464.08 toks/s, output: 1161.35 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.37it/s, est. speed input: 1893.35 toks/s, output: 992.35 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.22it/s, est. speed input: 1661.15 toks/s, output: 1105.26 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.15it/s, est. speed input: 1850.42 toks/s, output: 1050.73 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.15it/s, est. speed input: 1849.34 toks/s, output: 1009.32 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.28it/s, est. speed input: 1704.81 toks/s, output: 1142.40 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.80it/s, est. speed input: 1825.47 toks/s, output: 1052.45 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.70it/s, est. speed input: 1390.74 toks/s, output: 1103.54 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.20it/s, est. speed input: 1908.80 toks/s, output: 991.40 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.90it/s, est. speed input: 1639.58 toks/s, output: 1099.75 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.76it/s, est. speed input: 1913.78 toks/s, output: 878.57 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.02it/s, est. speed input: 1768.69 toks/s, output: 1156.38 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.31it/s, est. speed input: 1629.29 toks/s, output: 1171.22 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.00it/s, est. speed input: 1960.58 toks/s, output: 995.92 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.15it/s, est. speed input: 1792.77 toks/s, output: 1112.86 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.08it/s, est. speed input: 1314.08 toks/s, output: 1059.58 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.93it/s, est. speed input: 1281.74 toks/s, output: 1066.75 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.20it/s, est. speed input: 1770.88 toks/s, output: 1149.16 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.65it/s, est. speed input: 1435.76 toks/s, output: 1077.51 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.45it/s, est. speed input: 1609.55 toks/s, output: 1073.66 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.67it/s, est. speed input: 1682.39 toks/s, output: 974.89 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.42it/s, est. speed input: 1648.80 toks/s, output: 955.60 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.52it/s, est. speed input: 1668.25 toks/s, output: 1113.82 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.58it/s, est. speed input: 1877.06 toks/s, output: 1008.62 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.46it/s, est. speed input: 1853.55 toks/s, output: 982.38 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.21it/s, est. speed input: 1560.48 toks/s, output: 1183.88 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.95it/s, est. speed input: 1891.69 toks/s, output: 930.70 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.84it/s, est. speed input: 1679.84 toks/s, output: 1064.45 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.01it/s, est. speed input: 1712.78 toks/s, output: 1122.50 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.96it/s, est. speed input: 1703.65 toks/s, output: 997.53 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.23it/s, est. speed input: 1453.16 toks/s, output: 1156.89 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.41it/s, est. speed input: 1891.79 toks/s, output: 1105.11 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.75it/s, est. speed input: 1357.25 toks/s, output: 1160.58 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.14it/s, est. speed input: 1838.14 toks/s, output: 1083.30 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.54it/s, est. speed input: 1916.69 toks/s, output: 1037.81 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.59it/s, est. speed input: 1760.60 toks/s, output: 1073.89 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.79it/s, est. speed input: 1601.84 toks/s, output: 1056.79 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.69it/s, est. speed input: 1385.94 toks/s, output: 1104.26 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.81it/s, est. speed input: 1632.19 toks/s, output: 1121.94 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.74it/s, est. speed input: 1829.78 toks/s, output: 1010.31 toks/s]
 54%|█████▍    | 75/139 [38:31<31:15, 29.31s/it]



Processed prompts: 100%|██████████| 48/48 [00:00<00:00, 2632.99it/s, est. speed input: 5449237.63 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:00<00:00, 2144.88it/s, est. speed input: 4413091.99 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:00<00:00, 2312.64it/s, est. speed input: 4759477.43 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:00<00:00, 1985.43it/s, est. speed input: 4084257.61 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:00<00:00, 2341.85it/s, est. speed input: 4819982.32 toks/s, output: 0.00 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.79it/s, est. speed input: 1784.25 toks/s, output: 1034.77 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.50it/s, est. speed input: 1522.74 toks/s, output: 1129.08 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.76it/s, est. speed input: 1575.48 toks/s, output: 1142.80 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.80it/s, est. speed input: 1787.14 toks/s, output: 956.84 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.59it/s, est. speed input: 1540.06 toks/s, output: 1137.02 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.44it/s, est. speed input: 1860.58 toks/s, output: 1094.78 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.79it/s, est. speed input: 1535.14 toks/s, output: 1110.28 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.99it/s, est. speed input: 1772.33 toks/s, output: 1076.22 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.87it/s, est. speed input: 1551.31 toks/s, output: 1141.00 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.21it/s, est. speed input: 1813.86 toks/s, output: 1118.12 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.80it/s, est. speed input: 2051.17 toks/s, output: 860.72 toks/s] 




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.76it/s, est. speed input: 1679.46 toks/s, output: 1071.63 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.64it/s, est. speed input: 2045.52 toks/s, output: 820.38 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.63it/s, est. speed input: 1855.61 toks/s, output: 1066.10 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.12it/s, est. speed input: 1758.46 toks/s, output: 1019.41 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.20it/s, est. speed input: 1840.22 toks/s, output: 949.25 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.16it/s, est. speed input: 1633.01 toks/s, output: 1087.31 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.71it/s, est. speed input: 1541.80 toks/s, output: 1125.51 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.11it/s, est. speed input: 1823.42 toks/s, output: 970.97 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.69it/s, est. speed input: 1738.92 toks/s, output: 1013.82 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.77it/s, est. speed input: 1914.38 toks/s, output: 1049.16 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.08it/s, est. speed input: 1976.08 toks/s, output: 925.86 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.26it/s, est. speed input: 1814.87 toks/s, output: 1060.79 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.99it/s, est. speed input: 1957.63 toks/s, output: 842.31 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.59it/s, est. speed input: 1880.34 toks/s, output: 1009.52 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.51it/s, est. speed input: 1240.57 toks/s, output: 994.18 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.12it/s, est. speed input: 1376.43 toks/s, output: 1027.22 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  6.91it/s, est. speed input: 1554.09 toks/s, output: 1100.24 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.50it/s, est. speed input: 1462.82 toks/s, output: 1045.37 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  6.86it/s, est. speed input: 1543.89 toks/s, output: 1047.84 toks/s]
 59%|█████▉    | 82/139 [41:24<27:43, 29.19s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.18it/s, est. speed input: 1701.77 toks/s, output: 1090.37 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  6.96it/s, est. speed input: 1447.18 toks/s, output: 1160.03 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.10it/s, est. speed input: 1685.70 toks/s, output: 1035.83 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.08it/s, est. speed input: 1681.57 toks/s, output: 1144.29 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  6.95it/s, est. speed input: 1445.86 toks/s, output: 1123.06 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.55it/s, est. speed input: 1861.81 toks/s, output: 1057.01 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.13it/s, est. speed input: 1584.73 toks/s, output: 1164.68 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.02it/s, est. speed input: 1758.57 toks/s, output: 956.31 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:03<00:00, 14.27it/s, est. speed input: 2784.53 toks/s, output: 935.91 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.70it/s, est. speed input: 1891.61 toks/s, output: 995.32 toks/s]
 60%|██████    | 84/139 [42:20<25:57, 28.31s/it]



Processed prompts: 100%|██████████| 48/48 [00:00<00:00, 2457.18it/s, est. speed input: 3735077.39 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:00<00:00, 2388.41it/s, est. speed input: 3631525.49 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:00<00:00, 2308.08it/s, est. speed input: 3509354.11 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:00<00:00, 2496.05it/s, est. speed input: 3797810.34 toks/s, output: 0.00 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:00<00:00, 2250.79it/s, est. speed input: 3421047.16 toks/s, output: 0.00 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.60it/s, est. speed input: 1872.33 toks/s, output: 934.16 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.66it/s, est. speed input: 1716.69 toks/s, output: 1002.23 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.10it/s, est. speed input: 1605.23 toks/s, output: 1042.42 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.05it/s, est. speed input: 1992.82 toks/s, output: 1041.75 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.35it/s, est. speed input: 1456.66 toks/s, output: 1153.54 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.85it/s, est. speed input: 1648.38 toks/s, output: 1076.68 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.94it/s, est. speed input: 1876.83 toks/s, output: 952.75 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.11it/s, est. speed input: 1703.69 toks/s, output: 1096.91 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.50it/s, est. speed input: 1784.96 toks/s, output: 1059.47 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.67it/s, est. speed input: 1819.98 toks/s, output: 1083.14 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.15it/s, est. speed input: 1831.06 toks/s, output: 1061.44 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.11it/s, est. speed input: 2022.83 toks/s, output: 856.75 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.28it/s, est. speed input: 1856.44 toks/s, output: 1064.16 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.59it/s, est. speed input: 1518.63 toks/s, output: 1061.14 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.27it/s, est. speed input: 1654.18 toks/s, output: 1132.59 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.89it/s, est. speed input: 1761.16 toks/s, output: 1012.51 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.11it/s, est. speed input: 1606.80 toks/s, output: 1086.58 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.16it/s, est. speed input: 1812.95 toks/s, output: 1085.02 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.08it/s, est. speed input: 1797.35 toks/s, output: 990.39 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.45it/s, est. speed input: 1672.82 toks/s, output: 1068.74 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.15it/s, est. speed input: 1729.11 toks/s, output: 969.68 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:10<00:00,  4.71it/s, est. speed input: 1963.55 toks/s, output: 990.41 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.26it/s, est. speed input: 1775.89 toks/s, output: 1001.24 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.17it/s, est. speed input: 1737.78 toks/s, output: 980.19 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.14it/s, est. speed input: 1726.37 toks/s, output: 988.85 toks/s]
 65%|██████▍   | 90/139 [45:08<28:32, 34.96s/it]



Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.13it/s, est. speed input: 1490.41 toks/s, output: 1110.82 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.16it/s, est. speed input: 1706.33 toks/s, output: 1083.12 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.33it/s, est. speed input: 1741.13 toks/s, output: 1035.09 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.89it/s, est. speed input: 1649.87 toks/s, output: 1120.30 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.68it/s, est. speed input: 1187.95 toks/s, output: 974.56 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 11.24it/s, est. speed input: 2303.38 toks/s, output: 809.23 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.90it/s, est. speed input: 1845.23 toks/s, output: 1005.85 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.70it/s, est. speed input: 1825.66 toks/s, output: 985.21 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.17it/s, est. speed input: 1513.73 toks/s, output: 1083.91 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.74it/s, est. speed input: 1844.87 toks/s, output: 986.16 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.02it/s, est. speed input: 1731.72 toks/s, output: 1019.94 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.16it/s, est. speed input: 1567.09 toks/s, output: 1143.01 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.44it/s, est. speed input: 2005.04 toks/s, output: 900.48 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.46it/s, est. speed input: 1816.60 toks/s, output: 976.69 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.94it/s, est. speed input: 1909.44 toks/s, output: 992.22 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.18it/s, est. speed input: 1774.31 toks/s, output: 1073.51 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.45it/s, est. speed input: 1182.22 toks/s, output: 1034.78 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.70it/s, est. speed input: 1236.49 toks/s, output: 1038.84 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.53it/s, est. speed input: 1852.27 toks/s, output: 1032.12 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.85it/s, est. speed input: 1269.17 toks/s, output: 1091.64 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.92it/s, est. speed input: 1672.22 toks/s, output: 1130.83 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.02it/s, est. speed input: 1691.81 toks/s, output: 1142.57 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.85it/s, est. speed input: 1867.84 toks/s, output: 1105.62 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.56it/s, est. speed input: 1805.98 toks/s, output: 1016.39 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.83it/s, est. speed input: 1863.38 toks/s, output: 1052.57 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.95it/s, est. speed input: 1558.82 toks/s, output: 1130.51 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.31it/s, est. speed input: 1643.81 toks/s, output: 1040.09 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.46it/s, est. speed input: 1870.05 toks/s, output: 944.58 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.52it/s, est. speed input: 1883.00 toks/s, output: 964.82 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.48it/s, est. speed input: 1675.98 toks/s, output: 968.07 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.08it/s, est. speed input: 1118.14 toks/s, output: 1037.04 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  4.92it/s, est. speed input: 1347.87 toks/s, output: 1088.79 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:10<00:00,  4.46it/s, est. speed input: 1221.06 toks/s, output: 1117.35 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.00it/s, est. speed input: 1096.57 toks/s, output: 1026.95 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:10<00:00,  4.47it/s, est. speed input: 1225.55 toks/s, output: 1124.91 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.42it/s, est. speed input: 1959.18 toks/s, output: 947.02 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.79it/s, est. speed input: 1513.03 toks/s, output: 1174.78 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.98it/s, est. speed input: 1937.57 toks/s, output: 1028.85 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.68it/s, est. speed input: 1879.95 toks/s, output: 1034.15 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.46it/s, est. speed input: 1837.65 toks/s, output: 985.26 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.58it/s, est. speed input: 1887.24 toks/s, output: 998.50 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.50it/s, est. speed input: 1881.72 toks/s, output: 965.19 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.56it/s, est. speed input: 1710.57 toks/s, output: 1070.58 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.56it/s, est. speed input: 1910.98 toks/s, output: 955.69 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.89it/s, est. speed input: 1775.36 toks/s, output: 1056.70 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  6.90it/s, est. speed input: 1490.90 toks/s, output: 1098.19 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.27it/s, est. speed input: 1137.56 toks/s, output: 977.92 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.90it/s, est. speed input: 1707.39 toks/s, output: 1034.35 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.79it/s, est. speed input: 1251.30 toks/s, output: 1033.82 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.32it/s, est. speed input: 1364.50 toks/s, output: 1066.81 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.77it/s, est. speed input: 1577.34 toks/s, output: 1118.90 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.27it/s, est. speed input: 1882.57 toks/s, output: 995.19 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.15it/s, est. speed input: 1654.58 toks/s, output: 1169.11 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.95it/s, est. speed input: 1816.28 toks/s, output: 1162.20 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.33it/s, est. speed input: 2096.24 toks/s, output: 934.31 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.11it/s, est. speed input: 1458.70 toks/s, output: 1135.23 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.72it/s, est. speed input: 1787.80 toks/s, output: 1148.62 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.97it/s, est. speed input: 2044.11 toks/s, output: 1032.23 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.39it/s, est. speed input: 1514.25 toks/s, output: 1168.31 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.36it/s, est. speed input: 1713.60 toks/s, output: 1138.04 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.25it/s, est. speed input: 1767.71 toks/s, output: 1018.63 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.74it/s, est. speed input: 1749.95 toks/s, output: 1080.93 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.07it/s, est. speed input: 2024.63 toks/s, output: 1044.52 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.41it/s, est. speed input: 2092.61 toks/s, output: 878.55 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.77it/s, est. speed input: 1763.17 toks/s, output: 1046.49 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.73it/s, est. speed input: 1907.44 toks/s, output: 1034.61 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.03it/s, est. speed input: 1574.66 toks/s, output: 1069.85 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.85it/s, est. speed input: 1735.52 toks/s, output: 1056.65 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.79it/s, est. speed input: 1331.82 toks/s, output: 1070.49 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.73it/s, est. speed input: 1908.16 toks/s, output: 959.35 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.27it/s, est. speed input: 1304.50 toks/s, output: 1076.63 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.38it/s, est. speed input: 1386.35 toks/s, output: 1096.80 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.75it/s, est. speed input: 1684.81 toks/s, output: 1129.61 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.13it/s, est. speed input: 1549.97 toks/s, output: 1120.47 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.71it/s, est. speed input: 1893.64 toks/s, output: 1034.03 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.42it/s, est. speed input: 1607.81 toks/s, output: 1127.47 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.40it/s, est. speed input: 1604.75 toks/s, output: 1065.28 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.38it/s, est. speed input: 1601.23 toks/s, output: 1091.41 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.51it/s, est. speed input: 1625.12 toks/s, output: 1069.94 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.36it/s, est. speed input: 1597.50 toks/s, output: 1079.98 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.19it/s, est. speed input: 1115.67 toks/s, output: 1106.37 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.62it/s, est. speed input: 1423.62 toks/s, output: 1154.76 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.96it/s, est. speed input: 1281.98 toks/s, output: 1096.51 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.35it/s, est. speed input: 1796.31 toks/s, output: 1043.67 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.66it/s, est. speed input: 1647.11 toks/s, output: 1127.28 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.36it/s, est. speed input: 1698.42 toks/s, output: 1024.90 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.82it/s, est. speed input: 1789.75 toks/s, output: 1100.41 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.30it/s, est. speed input: 1483.08 toks/s, output: 1116.12 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.54it/s, est. speed input: 1935.79 toks/s, output: 1089.87 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.59it/s, est. speed input: 1744.10 toks/s, output: 1100.44 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.49it/s, est. speed input: 1952.01 toks/s, output: 904.07 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.12it/s, est. speed input: 1725.61 toks/s, output: 1019.33 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.76it/s, est. speed input: 1657.46 toks/s, output: 1105.34 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.08it/s, est. speed input: 1908.07 toks/s, output: 1034.69 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.78it/s, est. speed input: 1471.84 toks/s, output: 1053.97 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.60it/s, est. speed input: 1565.04 toks/s, output: 1154.94 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.60it/s, est. speed input: 1566.67 toks/s, output: 1095.62 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.35it/s, est. speed input: 1307.72 toks/s, output: 1105.11 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.44it/s, est. speed input: 1327.70 toks/s, output: 1142.67 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.61it/s, est. speed input: 1980.09 toks/s, output: 1055.52 toks/s]
 79%|███████▉  | 110/139 [55:29<14:43, 30.45s/it]



Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.94it/s, est. speed input: 1723.41 toks/s, output: 1095.17 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.88it/s, est. speed input: 1276.88 toks/s, output: 1037.46 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.58it/s, est. speed input: 1645.23 toks/s, output: 1137.88 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.53it/s, est. speed input: 1633.54 toks/s, output: 1162.42 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.22it/s, est. speed input: 1784.82 toks/s, output: 1160.41 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  4.98it/s, est. speed input: 1359.20 toks/s, output: 1151.03 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.21it/s, est. speed input: 1150.18 toks/s, output: 1086.10 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.14it/s, est. speed input: 1402.95 toks/s, output: 1113.99 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:12<00:00,  3.87it/s, est. speed input: 1057.54 toks/s, output: 1054.23 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:13<00:00,  3.69it/s, est. speed input: 1006.35 toks/s, output: 1124.31 toks/s]
 81%|████████  | 112/139 [56:58<17:21, 38.56s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.69it/s, est. speed input: 1764.05 toks/s, output: 1092.93 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.43it/s, est. speed input: 1915.18 toks/s, output: 1045.45 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.51it/s, est. speed input: 1930.49 toks/s, output: 878.27 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.84it/s, est. speed input: 1795.36 toks/s, output: 1102.38 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.06it/s, est. speed input: 1840.12 toks/s, output: 1037.52 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.98it/s, est. speed input: 1684.78 toks/s, output: 1153.46 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.18it/s, est. speed input: 1726.07 toks/s, output: 1026.47 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.80it/s, est. speed input: 1645.06 toks/s, output: 1135.19 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.86it/s, est. speed input: 1870.45 toks/s, output: 1037.17 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.05it/s, est. speed input: 1698.38 toks/s, output: 1056.79 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.25it/s, est. speed input: 2039.43 toks/s, output: 904.63 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.29it/s, est. speed input: 2048.00 toks/s, output: 862.98 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.23it/s, est. speed input: 2035.35 toks/s, output: 884.28 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.09it/s, est. speed input: 1808.97 toks/s, output: 995.01 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.64it/s, est. speed input: 1720.49 toks/s, output: 1102.86 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.60it/s, est. speed input: 1551.09 toks/s, output: 1112.94 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.25it/s, est. speed input: 1071.07 toks/s, output: 1110.11 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.94it/s, est. speed input: 1825.13 toks/s, output: 880.69 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.26it/s, est. speed input: 1889.52 toks/s, output: 1121.51 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.76it/s, est. speed input: 1786.23 toks/s, output: 1018.61 toks/s]
 83%|████████▎ | 116/139 [58:50<11:52, 30.99s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.44it/s, est. speed input: 1756.42 toks/s, output: 1018.25 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.23it/s, est. speed input: 1920.17 toks/s, output: 1045.66 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.23it/s, est. speed input: 1296.57 toks/s, output: 1086.83 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.62it/s, est. speed input: 1793.38 toks/s, output: 948.60 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.02it/s, est. speed input: 1669.08 toks/s, output: 1026.95 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.12it/s, est. speed input: 1587.92 toks/s, output: 1114.69 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.22it/s, est. speed input: 1609.83 toks/s, output: 1087.21 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:09<00:00,  5.26it/s, est. speed input: 1173.87 toks/s, output: 1044.14 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.41it/s, est. speed input: 1430.42 toks/s, output: 1070.54 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.66it/s, est. speed input: 1707.47 toks/s, output: 1088.06 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.47it/s, est. speed input: 1884.45 toks/s, output: 1022.91 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.94it/s, est. speed input: 1784.32 toks/s, output: 1067.13 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.43it/s, est. speed input: 1696.17 toks/s, output: 1108.65 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.35it/s, est. speed input: 1880.95 toks/s, output: 957.51 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.27it/s, est. speed input: 2066.17 toks/s, output: 854.32 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.56it/s, est. speed input: 1728.67 toks/s, output: 1097.18 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.66it/s, est. speed input: 1548.35 toks/s, output: 1173.07 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.20it/s, est. speed input: 1656.86 toks/s, output: 1137.21 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.74it/s, est. speed input: 1563.17 toks/s, output: 1144.97 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.91it/s, est. speed input: 1597.46 toks/s, output: 1105.34 toks/s]
 86%|████████▋ | 120/139 [1:00:53<09:36, 30.36s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.45it/s, est. speed input: 1765.45 toks/s, output: 1073.66 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.85it/s, est. speed input: 1850.04 toks/s, output: 991.78 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.84it/s, est. speed input: 1638.46 toks/s, output: 1130.19 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.18it/s, est. speed input: 1292.18 toks/s, output: 1085.06 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.83it/s, est. speed input: 2053.98 toks/s, output: 981.33 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.48it/s, est. speed input: 1628.81 toks/s, output: 1061.83 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.72it/s, est. speed input: 1673.61 toks/s, output: 1184.57 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.42it/s, est. speed input: 1425.02 toks/s, output: 1137.42 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.61it/s, est. speed input: 2036.40 toks/s, output: 907.93 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.99it/s, est. speed input: 2110.78 toks/s, output: 855.90 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.68it/s, est. speed input: 1876.99 toks/s, output: 1043.91 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.18it/s, est. speed input: 1987.08 toks/s, output: 886.96 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.74it/s, est. speed input: 1900.96 toks/s, output: 1027.48 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.47it/s, est. speed input: 2044.22 toks/s, output: 930.14 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.84it/s, est. speed input: 1726.77 toks/s, output: 1078.56 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.91it/s, est. speed input: 1574.76 toks/s, output: 1160.63 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.79it/s, est. speed input: 1550.50 toks/s, output: 1105.09 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.09it/s, est. speed input: 1609.99 toks/s, output: 1116.64 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.79it/s, est. speed input: 1750.66 toks/s, output: 1067.59 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.04it/s, est. speed input: 1600.99 toks/s, output: 1055.26 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.63it/s, est. speed input: 1683.24 toks/s, output: 1061.55 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.84it/s, est. speed input: 2114.70 toks/s, output: 934.22 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.32it/s, est. speed input: 1623.06 toks/s, output: 1037.47 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.95it/s, est. speed input: 1550.01 toks/s, output: 1135.51 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.45it/s, est. speed input: 2038.57 toks/s, output: 957.65 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:02<00:00, 22.82it/s, est. speed input: 17027.58 toks/s, output: 494.54 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:02<00:00, 22.95it/s, est. speed input: 17119.01 toks/s, output: 496.72 toks/s]
Pr



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.44it/s, est. speed input: 1714.63 toks/s, output: 1018.15 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.60it/s, est. speed input: 1557.62 toks/s, output: 1164.33 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.33it/s, est. speed input: 1297.35 toks/s, output: 1100.28 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.73it/s, est. speed input: 1789.77 toks/s, output: 1110.92 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.77it/s, est. speed input: 1592.62 toks/s, output: 1141.30 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.91it/s, est. speed input: 1205.97 toks/s, output: 1042.29 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.07it/s, est. speed input: 1442.53 toks/s, output: 1117.84 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.32it/s, est. speed input: 1697.48 toks/s, output: 1007.70 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.14it/s, est. speed input: 1661.05 toks/s, output: 1044.60 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.21it/s, est. speed input: 1675.89 toks/s, output: 1079.26 toks/s]
 92%|█████████▏| 128/139 [1:04:24<05:01, 27.37s/it]



Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.58it/s, est. speed input: 1606.62 toks/s, output: 1133.60 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.21it/s, est. speed input: 1542.89 toks/s, output: 1140.93 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.57it/s, est. speed input: 1404.74 toks/s, output: 1123.46 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.96it/s, est. speed input: 1274.50 toks/s, output: 1045.24 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.72it/s, est. speed input: 1651.92 toks/s, output: 1116.51 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.71it/s, est. speed input: 1503.54 toks/s, output: 1160.75 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.54it/s, est. speed input: 1666.10 toks/s, output: 1055.37 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.91it/s, est. speed input: 1542.40 toks/s, output: 1105.22 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.40it/s, est. speed input: 1637.90 toks/s, output: 982.04 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.34it/s, est. speed input: 1431.19 toks/s, output: 1101.67 toks/s]
 94%|█████████▎| 130/139 [1:05:29<04:27, 29.75s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.64it/s, est. speed input: 1737.13 toks/s, output: 1025.21 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.56it/s, est. speed input: 1720.65 toks/s, output: 1161.19 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.77it/s, est. speed input: 1762.74 toks/s, output: 1118.34 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.29it/s, est. speed input: 1665.32 toks/s, output: 1106.93 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.58it/s, est. speed input: 1723.85 toks/s, output: 1150.84 toks/s]
 94%|█████████▍| 131/139 [1:05:57<03:53, 29.25s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.43it/s, est. speed input: 1913.99 toks/s, output: 967.99 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.99it/s, est. speed input: 1640.15 toks/s, output: 1102.14 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.35it/s, est. speed input: 1714.27 toks/s, output: 1105.15 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.34it/s, est. speed input: 1711.23 toks/s, output: 1030.77 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.53it/s, est. speed input: 1751.39 toks/s, output: 1007.14 toks/s]
 95%|█████████▍| 132/139 [1:06:25<03:22, 28.95s/it]



Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.76it/s, est. speed input: 1835.04 toks/s, output: 940.29 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.61it/s, est. speed input: 1807.16 toks/s, output: 878.74 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 11.55it/s, est. speed input: 2172.84 toks/s, output: 867.06 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.66it/s, est. speed input: 1439.81 toks/s, output: 1118.62 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:04<00:00, 10.10it/s, est. speed input: 1900.10 toks/s, output: 964.37 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.26it/s, est. speed input: 1898.44 toks/s, output: 971.40 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.07it/s, est. speed input: 1654.34 toks/s, output: 1066.74 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.42it/s, est. speed input: 1725.63 toks/s, output: 1076.59 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.06it/s, est. speed input: 1243.17 toks/s, output: 1074.88 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.49it/s, est. speed input: 1534.85 toks/s, output: 1153.78 toks/s]
 96%|█████████▋| 134/139 [1:07:22<02:24, 28.82s/it]



Processed prompts: 100%|██████████| 48/48 [00:13<00:00,  3.65it/s, est. speed input: 1127.83 toks/s, output: 1138.32 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.15it/s, est. speed input: 1281.29 toks/s, output: 1058.32 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:08<00:00,  5.59it/s, est. speed input: 1727.67 toks/s, output: 1076.30 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:12<00:00,  3.75it/s, est. speed input: 1160.30 toks/s, output: 1104.91 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:11<00:00,  4.10it/s, est. speed input: 1267.41 toks/s, output: 1026.44 toks/s]
 97%|█████████▋| 135/139 [1:08:20<02:30, 37.53s/it]



Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.48it/s, est. speed input: 1518.38 toks/s, output: 1130.05 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.35it/s, est. speed input: 1898.07 toks/s, output: 1035.52 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.23it/s, est. speed input: 1671.00 toks/s, output: 1122.23 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.47it/s, est. speed input: 1923.48 toks/s, output: 991.94 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.80it/s, est. speed input: 1584.08 toks/s, output: 1211.96 toks/s]
 98%|█████████▊| 136/139 [1:08:48<01:44, 34.86s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.05it/s, est. speed input: 1651.19 toks/s, output: 1110.69 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.05it/s, est. speed input: 1873.44 toks/s, output: 1035.31 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.41it/s, est. speed input: 1550.32 toks/s, output: 1162.35 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.63it/s, est. speed input: 1804.49 toks/s, output: 1083.52 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  8.31it/s, est. speed input: 1736.80 toks/s, output: 1096.18 toks/s]
 99%|█████████▊| 137/139 [1:09:17<01:06, 33.14s/it]



Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.57it/s, est. speed input: 1846.73 toks/s, output: 1017.65 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:07<00:00,  6.81it/s, est. speed input: 1328.98 toks/s, output: 1107.53 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:06<00:00,  7.15it/s, est. speed input: 1393.87 toks/s, output: 1133.32 toks/s]
Processed prompts: 100%|██████████| 48/48 [00:04<00:00,  9.69it/s, est. speed input: 1890.75 toks/s, output: 953.86 toks/s]




Processed prompts: 100%|██████████| 48/48 [00:05<00:00,  9.40it/s, est. speed input: 1832.74 toks/s, output: 964.92 toks/s]
Processed prompts: 100%|██████████| 11/11 [00:02<00:00,  4.15it/s, est. speed input: 731.48 toks/s, output: 377.45 toks/s]
Processed prompts: 100%|██████████| 11/11 [00:02<00:00,  4.57it/s, est. speed input: 840.12 toks/s, output: 469.04 toks/s] 
Processed prompts: 100%|██████████| 11/11 [00:02<00:00,  3.89it/s, est. speed input: 727.06 toks/s, output: 449.05 toks/s]
Processed prompts: 100%|██████████| 11/11 [00:01<00:00,  7.75it/s, est. speed input: 1446.71 toks/s, output: 522.17 toks/s]
Processed prompts: 100%|██████████| 11/11 [00:00<00:00, 20.11it/s, est. speed input: 3754.08 toks/s, output: 661.93 toks/s]
100%|██████████| 139/139 [1:09:56<00:00, 30.19s/it]

Average BLEU score: 0.2032
Average tools used: 0.0104
Average calls per sample: 1.2754



