In [1]:
# Install dependencies
!pip install transformers datasets torch nltk rouge_score psutil gpustat

# Set environment variable to reduce memory fragmentation
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Hugging Face login
from huggingface_hub import login
login("HUGGING FACE TOKEN")  # Replace with your actual Hugging Face token

import time
import torch
import psutil
import gpustat
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
import numpy as np
from tqdm import tqdm

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B")
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.2-3B",
    device_map="auto",  # Use "0" for single GPU if OOM persists
    torch_dtype=torch.float16
)
model.eval()

# Load ARC-Challenge dataset (first 500 samples from test split)
dataset = load_dataset("allenai/ai2_arc", "ARC-Challenge", split="test[:500]")

# Initialize metrics
latencies, tokens_per_sec, perplexities, bleus, rouge1s, rougeLs, memories, f1s = [], [], [], [], [], [], [], []
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

# Evaluation loop
for sample in tqdm(dataset, desc="Evaluating"):
    question = sample["question"]
    choices = sample["choices"]["text"]
    answer_key = sample["answerKey"]  # Typically A, B, C, D, or a number
    # Map answerKey to the corresponding choice text
    answer_idx = ord(answer_key) - ord('A') if answer_key in 'ABCD' else int(answer_key) - 1
    reference = choices[answer_idx].strip()

    # Create prompt for multiple-choice question
    choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(choices)])
    prompt = f"Answer the following question by selecting the correct choice (A, B, C, D, or number):\n{question}\n{choices_text}\nProvide the final answer as the choice letter (A, B, C, D) or number."

    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    input_length = inputs.input_ids.size(1)

    # Clear GPU memory
    torch.cuda.empty_cache()

    # Measure latency and generate
    start_time = time.time()
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=20,  # Reduced to minimize memory
            do_sample=False
        )
    latency = time.time() - start_time

    # Decode output
    generated = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True).strip()
    output_length = outputs[0][input_length:].size(0)

    # Latency and tokens per second
    latencies.append(latency)
    tokens_per_sec.append(output_length / latency if latency > 0 else 0)

    # Perplexity (forward pass, skip for very short sequences)
    if output_length > 2:
        with torch.no_grad():
            input_ids = outputs[:, input_length:].to("cuda")
            labels = input_ids.clone().to("cuda")
            outputs_forward = model(input_ids, labels=labels)
            perplexity = torch.exp(outputs_forward.loss).item()
        perplexities.append(perplexity)
    else:
        perplexities.append(float('inf'))

    # BLEU and ROUGE
    bleu = sentence_bleu([reference.split()], generated.split())
    bleus.append(bleu)
    rouge_scores = scorer.score(reference, generated)
    rouge1s.append(rouge_scores['rouge1'].fmeasure)
    rougeLs.append(rouge_scores['rougeL'].fmeasure)

    # Memory usage (GPU)
    gpu_stats = gpustat.new_query().gpus[0]
    memory_used = gpu_stats.memory_used / 1024  # Convert MB to GB
    memories.append(memory_used)

    # F1 Score (binary: correct or not)
    # Compare generated answer (e.g., 'A') with reference choice text
    is_correct = generated == answer_key
    f1 = 1.0 if is_correct else 0.0
    f1s.append(f1)

    # Log memory usage for debugging
    print(f"Sample {len(latencies)}: GPU Memory Used: {memory_used:.3f} GB")

# Compute averages
avg_latency = np.mean(latencies)
avg_tps = np.mean(tokens_per_sec)
avg_perplexity = np.mean([p for p in perplexities if p != float('inf')]) if perplexities else float('inf')
avg_bleu = np.mean(bleus)
avg_rouge1 = np.mean(rouge1s)
avg_rougeL = np.mean(rougeLs)
avg_memory = np.mean(memories)
avg_f1 = np.mean(f1s)
avg_knowledge_retention = avg_f1
avg_flop_reduction = 0.0
avg_retrieval_latency = 0.0
avg_memory_reduction = 0.0
avg_query_time = avg_latency
avg_accuracy_drop = 0.0
avg_compression_ratio = 1.0

# Print results
print(f"Avg latency: {avg_latency:.3f} sec")
print(f"Tokens per sec: {avg_tps:.2f}")
print(f"Avg perplexity: {avg_perplexity:.2f}")
print(f"BLEU Score: {avg_bleu:.3f}")
print(f"ROUGE-1 Score: {avg_rouge1:.3f}")
print(f"ROUGE-L Score: {avg_rougeL:.3f}")
print(f"Memory usage (GB): {avg_memory:.3f}")
print(f"FLOP Reduction (%): {avg_flop_reduction:.2f}")
print(f"Retrieval Latency (sec): {avg_retrieval_latency:.3f}")
print(f"F1 Score: {avg_f1:.3f}")
print(f"Knowledge Retention: {avg_knowledge_retention:.3f}")
print(f"Memory Reduction (%): {avg_memory_reduction:.2f}")
print(f"Query Processing Time (sec): {avg_query_time:.3f}")
print(f"Accuracy Drop: {avg_accuracy_drop:.3f}")
print(f"Compression Ratio: {avg_compression_ratio:.2f}")

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gpustat
  Downloading gpustat-1.1.1.tar.gz (98 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.1/98.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvi

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/844 [00:00<?, ?B/s]

2025-04-21 18:58:12.360286: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745261892.563956      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745261892.619569      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/9.00k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


train-00000-of-00001.parquet:   0%|          | 0.00/190k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


test-00000-of-00001.parquet:   0%|          | 0.00/204k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


validation-00000-of-00001.parquet:   0%|          | 0.00/55.7k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1119 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1172 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/299 [00:00<?, ? examples/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Evaluating:   0%|          | 1/500 [00:00<08:01,  1.04it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 1: GPU Memory Used: 3.146 GB


Evaluating:   0%|          | 2/500 [00:01<04:42,  1.76it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 2: GPU Memory Used: 3.148 GB


Evaluating:   1%|          | 3/500 [00:01<03:39,  2.27it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 3: GPU Memory Used: 3.146 GB


Evaluating:   1%|          | 4/500 [00:01<03:09,  2.62it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 4: GPU Memory Used: 3.148 GB


Evaluating:   1%|          | 5/500 [00:02<02:55,  2.82it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 5: GPU Memory Used: 3.150 GB


Evaluating:   1%|          | 6/500 [00:02<02:47,  2.95it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 6: GPU Memory Used: 3.150 GB


Evaluating:   1%|▏         | 7/500 [00:02<02:42,  3.03it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 7: GPU Memory Used: 3.150 GB


Evaluating:   2%|▏         | 8/500 [00:03<02:36,  3.15it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 8: GPU Memory Used: 3.146 GB


Evaluating:   2%|▏         | 9/500 [00:03<02:34,  3.17it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 9: GPU Memory Used: 3.148 GB


Evaluating:   2%|▏         | 10/500 [00:03<02:30,  3.27it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 10: GPU Memory Used: 3.145 GB


Evaluating:   2%|▏         | 11/500 [00:03<02:27,  3.32it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 11: GPU Memory Used: 3.146 GB


Evaluating:   2%|▏         | 12/500 [00:04<02:24,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 12: GPU Memory Used: 3.145 GB


Evaluating:   3%|▎         | 13/500 [00:04<02:22,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 13: GPU Memory Used: 3.146 GB


Evaluating:   3%|▎         | 14/500 [00:04<02:22,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 14: GPU Memory Used: 3.146 GB


Evaluating:   3%|▎         | 15/500 [00:05<02:21,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 15: GPU Memory Used: 3.143 GB


Evaluating:   3%|▎         | 16/500 [00:05<02:20,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 16: GPU Memory Used: 3.145 GB


Evaluating:   3%|▎         | 17/500 [00:05<02:22,  3.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 17: GPU Memory Used: 3.148 GB


Evaluating:   4%|▎         | 18/500 [00:05<02:21,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 18: GPU Memory Used: 3.148 GB


Evaluating:   4%|▍         | 19/500 [00:06<02:23,  3.34it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 19: GPU Memory Used: 3.150 GB


Evaluating:   4%|▍         | 20/500 [00:06<02:22,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 20: GPU Memory Used: 3.146 GB


Evaluating:   4%|▍         | 21/500 [00:06<02:20,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 21: GPU Memory Used: 3.146 GB


Evaluating:   4%|▍         | 22/500 [00:07<02:22,  3.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 22: GPU Memory Used: 3.152 GB


Evaluating:   5%|▍         | 23/500 [00:07<02:20,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 23: GPU Memory Used: 3.145 GB


Evaluating:   5%|▍         | 24/500 [00:07<02:22,  3.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 24: GPU Memory Used: 3.148 GB


Evaluating:   5%|▌         | 25/500 [00:08<02:19,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 25: GPU Memory Used: 3.143 GB


Evaluating:   5%|▌         | 26/500 [00:08<02:18,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 26: GPU Memory Used: 3.146 GB


Evaluating:   5%|▌         | 27/500 [00:08<02:17,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 27: GPU Memory Used: 3.148 GB


Evaluating:   6%|▌         | 28/500 [00:08<02:19,  3.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 28: GPU Memory Used: 3.152 GB


Evaluating:   6%|▌         | 29/500 [00:09<02:18,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 29: GPU Memory Used: 3.146 GB


Evaluating:   6%|▌         | 30/500 [00:09<02:16,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 30: GPU Memory Used: 3.145 GB


Evaluating:   6%|▌         | 31/500 [00:09<02:16,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 31: GPU Memory Used: 3.146 GB


Evaluating:   6%|▋         | 32/500 [00:10<02:15,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 32: GPU Memory Used: 3.146 GB


Evaluating:   7%|▋         | 33/500 [00:10<02:14,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 33: GPU Memory Used: 3.145 GB


Evaluating:   7%|▋         | 34/500 [00:10<02:13,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 34: GPU Memory Used: 3.145 GB


Evaluating:   7%|▋         | 35/500 [00:10<02:12,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 35: GPU Memory Used: 3.143 GB


Evaluating:   7%|▋         | 36/500 [00:11<02:13,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 36: GPU Memory Used: 3.148 GB


Evaluating:   7%|▋         | 37/500 [00:11<02:13,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 37: GPU Memory Used: 3.148 GB


Evaluating:   8%|▊         | 38/500 [00:11<02:12,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 38: GPU Memory Used: 3.146 GB


Evaluating:   8%|▊         | 39/500 [00:12<02:12,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 39: GPU Memory Used: 3.146 GB


Evaluating:   8%|▊         | 40/500 [00:12<02:12,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 40: GPU Memory Used: 3.148 GB


Evaluating:   8%|▊         | 41/500 [00:12<02:12,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 41: GPU Memory Used: 3.145 GB


Evaluating:   8%|▊         | 42/500 [00:12<02:11,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 42: GPU Memory Used: 3.145 GB


Evaluating:   9%|▊         | 43/500 [00:13<02:11,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 43: GPU Memory Used: 3.145 GB


Evaluating:   9%|▉         | 44/500 [00:13<02:11,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 44: GPU Memory Used: 3.146 GB


Evaluating:   9%|▉         | 45/500 [00:13<02:10,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 45: GPU Memory Used: 3.146 GB


Evaluating:   9%|▉         | 46/500 [00:14<02:09,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 46: GPU Memory Used: 3.143 GB


Evaluating:   9%|▉         | 47/500 [00:14<02:09,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 47: GPU Memory Used: 3.143 GB


Evaluating:  10%|▉         | 48/500 [00:14<02:08,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 48: GPU Memory Used: 3.145 GB


Evaluating:  10%|▉         | 49/500 [00:14<02:08,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 49: GPU Memory Used: 3.145 GB


Evaluating:  10%|█         | 50/500 [00:15<02:08,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 50: GPU Memory Used: 3.145 GB


Evaluating:  10%|█         | 51/500 [00:15<02:11,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 51: GPU Memory Used: 3.150 GB


Evaluating:  10%|█         | 52/500 [00:15<02:10,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 52: GPU Memory Used: 3.146 GB


Evaluating:  11%|█         | 53/500 [00:16<02:09,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 53: GPU Memory Used: 3.145 GB


Evaluating:  11%|█         | 54/500 [00:16<02:08,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 54: GPU Memory Used: 3.145 GB


Evaluating:  11%|█         | 55/500 [00:16<02:08,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 55: GPU Memory Used: 3.146 GB


Evaluating:  11%|█         | 56/500 [00:17<02:10,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 56: GPU Memory Used: 3.152 GB


Evaluating:  11%|█▏        | 57/500 [00:17<02:10,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 57: GPU Memory Used: 3.146 GB


Evaluating:  12%|█▏        | 58/500 [00:17<02:09,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 58: GPU Memory Used: 3.148 GB


Evaluating:  12%|█▏        | 59/500 [00:17<02:07,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 59: GPU Memory Used: 3.143 GB


Evaluating:  12%|█▏        | 60/500 [00:18<02:10,  3.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 60: GPU Memory Used: 3.152 GB


Evaluating:  12%|█▏        | 61/500 [00:18<02:08,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 61: GPU Memory Used: 3.145 GB


Evaluating:  12%|█▏        | 62/500 [00:18<02:07,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 62: GPU Memory Used: 3.145 GB


Evaluating:  13%|█▎        | 63/500 [00:19<02:06,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 63: GPU Memory Used: 3.145 GB


Evaluating:  13%|█▎        | 64/500 [00:19<02:05,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 64: GPU Memory Used: 3.143 GB


Evaluating:  13%|█▎        | 65/500 [00:19<02:07,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 65: GPU Memory Used: 3.150 GB


Evaluating:  13%|█▎        | 66/500 [00:19<02:06,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 66: GPU Memory Used: 3.145 GB


Evaluating:  13%|█▎        | 67/500 [00:20<02:05,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 67: GPU Memory Used: 3.146 GB


Evaluating:  14%|█▎        | 68/500 [00:20<02:05,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 68: GPU Memory Used: 3.146 GB


Evaluating:  14%|█▍        | 69/500 [00:20<02:04,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 69: GPU Memory Used: 3.145 GB


Evaluating:  14%|█▍        | 70/500 [00:21<02:03,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 70: GPU Memory Used: 3.145 GB


Evaluating:  14%|█▍        | 71/500 [00:21<02:03,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 71: GPU Memory Used: 3.146 GB


Evaluating:  14%|█▍        | 72/500 [00:21<02:02,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 72: GPU Memory Used: 3.145 GB


Evaluating:  15%|█▍        | 73/500 [00:21<02:02,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 73: GPU Memory Used: 3.148 GB


Evaluating:  15%|█▍        | 74/500 [00:22<02:01,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 74: GPU Memory Used: 3.145 GB


Evaluating:  15%|█▌        | 75/500 [00:22<02:01,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 75: GPU Memory Used: 3.145 GB


Evaluating:  15%|█▌        | 76/500 [00:22<02:01,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 76: GPU Memory Used: 3.145 GB


Evaluating:  15%|█▌        | 77/500 [00:23<02:03,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 77: GPU Memory Used: 3.148 GB


Evaluating:  16%|█▌        | 78/500 [00:23<02:03,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 78: GPU Memory Used: 3.148 GB


Evaluating:  16%|█▌        | 79/500 [00:23<02:02,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 79: GPU Memory Used: 3.146 GB


Evaluating:  16%|█▌        | 80/500 [00:23<02:01,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 80: GPU Memory Used: 3.145 GB


Evaluating:  16%|█▌        | 81/500 [00:24<02:01,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 81: GPU Memory Used: 3.145 GB


Evaluating:  16%|█▋        | 82/500 [00:24<02:00,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 82: GPU Memory Used: 3.145 GB


Evaluating:  17%|█▋        | 83/500 [00:24<01:59,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 83: GPU Memory Used: 3.145 GB


Evaluating:  17%|█▋        | 84/500 [00:25<01:59,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 84: GPU Memory Used: 3.143 GB


Evaluating:  17%|█▋        | 85/500 [00:25<02:01,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 85: GPU Memory Used: 3.150 GB


Evaluating:  17%|█▋        | 86/500 [00:25<02:00,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 86: GPU Memory Used: 3.145 GB


Evaluating:  17%|█▋        | 87/500 [00:25<02:02,  3.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 87: GPU Memory Used: 3.152 GB


Evaluating:  18%|█▊        | 88/500 [00:26<02:03,  3.33it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 88: GPU Memory Used: 3.148 GB


Evaluating:  18%|█▊        | 89/500 [00:26<02:02,  3.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 89: GPU Memory Used: 3.148 GB


Evaluating:  18%|█▊        | 90/500 [00:26<02:00,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 90: GPU Memory Used: 3.145 GB


Evaluating:  18%|█▊        | 91/500 [00:27<01:59,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 91: GPU Memory Used: 3.145 GB


Evaluating:  18%|█▊        | 92/500 [00:27<01:58,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 92: GPU Memory Used: 3.146 GB


Evaluating:  19%|█▊        | 93/500 [00:27<01:57,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 93: GPU Memory Used: 3.146 GB


Evaluating:  19%|█▉        | 94/500 [00:28<02:00,  3.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 94: GPU Memory Used: 3.150 GB


Evaluating:  19%|█▉        | 95/500 [00:28<01:58,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 95: GPU Memory Used: 3.146 GB


Evaluating:  19%|█▉        | 96/500 [00:28<01:57,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 96: GPU Memory Used: 3.145 GB


Evaluating:  19%|█▉        | 97/500 [00:28<01:57,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 97: GPU Memory Used: 3.146 GB


Evaluating:  20%|█▉        | 98/500 [00:29<01:56,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 98: GPU Memory Used: 3.145 GB


Evaluating:  20%|█▉        | 99/500 [00:29<01:55,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 99: GPU Memory Used: 3.146 GB


Evaluating:  20%|██        | 100/500 [00:29<01:55,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 100: GPU Memory Used: 3.146 GB


Evaluating:  20%|██        | 101/500 [00:30<01:55,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 101: GPU Memory Used: 3.148 GB


Evaluating:  20%|██        | 102/500 [00:30<01:54,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 102: GPU Memory Used: 3.146 GB


Evaluating:  21%|██        | 103/500 [00:30<01:53,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 103: GPU Memory Used: 3.143 GB


Evaluating:  21%|██        | 104/500 [00:30<01:53,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 104: GPU Memory Used: 3.145 GB


Evaluating:  21%|██        | 105/500 [00:31<01:53,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 105: GPU Memory Used: 3.145 GB


Evaluating:  21%|██        | 106/500 [00:31<01:54,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 106: GPU Memory Used: 3.146 GB


Evaluating:  21%|██▏       | 107/500 [00:31<01:53,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 107: GPU Memory Used: 3.145 GB


Evaluating:  22%|██▏       | 108/500 [00:32<01:53,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 108: GPU Memory Used: 3.148 GB


Evaluating:  22%|██▏       | 109/500 [00:32<01:53,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 109: GPU Memory Used: 3.146 GB


Evaluating:  22%|██▏       | 110/500 [00:32<01:52,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 110: GPU Memory Used: 3.143 GB


Evaluating:  22%|██▏       | 111/500 [00:32<01:54,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 111: GPU Memory Used: 3.150 GB


Evaluating:  22%|██▏       | 112/500 [00:33<01:53,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 112: GPU Memory Used: 3.145 GB


Evaluating:  23%|██▎       | 113/500 [00:33<01:52,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 113: GPU Memory Used: 3.146 GB


Evaluating:  23%|██▎       | 114/500 [00:33<01:51,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 114: GPU Memory Used: 3.145 GB


Evaluating:  23%|██▎       | 115/500 [00:34<01:50,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 115: GPU Memory Used: 3.145 GB


Evaluating:  23%|██▎       | 116/500 [00:34<01:49,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 116: GPU Memory Used: 3.145 GB


Evaluating:  23%|██▎       | 117/500 [00:34<01:49,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 117: GPU Memory Used: 3.145 GB


Evaluating:  24%|██▎       | 118/500 [00:34<01:49,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 118: GPU Memory Used: 3.145 GB


Evaluating:  24%|██▍       | 119/500 [00:35<01:49,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 119: GPU Memory Used: 3.146 GB


Evaluating:  24%|██▍       | 120/500 [00:35<01:49,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 120: GPU Memory Used: 3.146 GB


Evaluating:  24%|██▍       | 121/500 [00:35<01:49,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 121: GPU Memory Used: 3.146 GB


Evaluating:  24%|██▍       | 122/500 [00:36<01:48,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 122: GPU Memory Used: 3.145 GB


Evaluating:  25%|██▍       | 123/500 [00:36<01:48,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 123: GPU Memory Used: 3.146 GB


Evaluating:  25%|██▍       | 124/500 [00:36<01:48,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 124: GPU Memory Used: 3.146 GB


Evaluating:  25%|██▌       | 125/500 [00:36<01:47,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 125: GPU Memory Used: 3.146 GB


Evaluating:  25%|██▌       | 126/500 [00:37<01:47,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 126: GPU Memory Used: 3.145 GB


Evaluating:  25%|██▌       | 127/500 [00:37<01:46,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 127: GPU Memory Used: 3.143 GB


Evaluating:  26%|██▌       | 128/500 [00:37<01:46,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 128: GPU Memory Used: 3.143 GB


Evaluating:  26%|██▌       | 129/500 [00:38<01:45,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 129: GPU Memory Used: 3.143 GB


Evaluating:  26%|██▌       | 130/500 [00:38<01:45,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 130: GPU Memory Used: 3.145 GB


Evaluating:  26%|██▌       | 131/500 [00:38<01:44,  3.52it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 131: GPU Memory Used: 3.145 GB


Evaluating:  26%|██▋       | 132/500 [00:38<01:45,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 132: GPU Memory Used: 3.145 GB


Evaluating:  27%|██▋       | 133/500 [00:39<01:44,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 133: GPU Memory Used: 3.145 GB


Evaluating:  27%|██▋       | 134/500 [00:39<01:44,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 134: GPU Memory Used: 3.145 GB


Evaluating:  27%|██▋       | 135/500 [00:39<01:44,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 135: GPU Memory Used: 3.146 GB


Evaluating:  27%|██▋       | 136/500 [00:40<01:44,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 136: GPU Memory Used: 3.146 GB


Evaluating:  27%|██▋       | 137/500 [00:40<01:44,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 137: GPU Memory Used: 3.148 GB


Evaluating:  28%|██▊       | 138/500 [00:40<01:44,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 138: GPU Memory Used: 3.146 GB


Evaluating:  28%|██▊       | 139/500 [00:40<01:44,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 139: GPU Memory Used: 3.146 GB


Evaluating:  28%|██▊       | 140/500 [00:41<01:43,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 140: GPU Memory Used: 3.145 GB


Evaluating:  28%|██▊       | 141/500 [00:41<01:42,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 141: GPU Memory Used: 3.146 GB


Evaluating:  28%|██▊       | 142/500 [00:41<01:42,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 142: GPU Memory Used: 3.146 GB


Evaluating:  29%|██▊       | 143/500 [00:42<01:42,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 143: GPU Memory Used: 3.145 GB


Evaluating:  29%|██▉       | 144/500 [00:42<01:42,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 144: GPU Memory Used: 3.148 GB


Evaluating:  29%|██▉       | 145/500 [00:42<01:42,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 145: GPU Memory Used: 3.145 GB


Evaluating:  29%|██▉       | 146/500 [00:43<01:44,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 146: GPU Memory Used: 3.148 GB


Evaluating:  29%|██▉       | 147/500 [00:43<01:42,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 147: GPU Memory Used: 3.145 GB


Evaluating:  30%|██▉       | 148/500 [00:43<01:42,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 148: GPU Memory Used: 3.145 GB


Evaluating:  30%|██▉       | 149/500 [00:43<01:41,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 149: GPU Memory Used: 3.143 GB


Evaluating:  30%|███       | 150/500 [00:44<01:40,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 150: GPU Memory Used: 3.145 GB


Evaluating:  30%|███       | 151/500 [00:44<01:41,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 151: GPU Memory Used: 3.145 GB


Evaluating:  30%|███       | 152/500 [00:44<01:40,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 152: GPU Memory Used: 3.145 GB


Evaluating:  31%|███       | 153/500 [00:45<01:39,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 153: GPU Memory Used: 3.145 GB


Evaluating:  31%|███       | 154/500 [00:45<01:39,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 154: GPU Memory Used: 3.146 GB


Evaluating:  31%|███       | 155/500 [00:45<01:39,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 155: GPU Memory Used: 3.146 GB


Evaluating:  31%|███       | 156/500 [00:45<01:38,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 156: GPU Memory Used: 3.145 GB


Evaluating:  31%|███▏      | 157/500 [00:46<01:38,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 157: GPU Memory Used: 3.145 GB


Evaluating:  32%|███▏      | 158/500 [00:46<01:37,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 158: GPU Memory Used: 3.145 GB


Evaluating:  32%|███▏      | 159/500 [00:46<01:37,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 159: GPU Memory Used: 3.145 GB


Evaluating:  32%|███▏      | 160/500 [00:47<01:36,  3.52it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 160: GPU Memory Used: 3.145 GB


Evaluating:  32%|███▏      | 161/500 [00:47<01:36,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 161: GPU Memory Used: 3.146 GB


Evaluating:  32%|███▏      | 162/500 [00:47<01:36,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 162: GPU Memory Used: 3.145 GB


Evaluating:  33%|███▎      | 163/500 [00:47<01:38,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 163: GPU Memory Used: 3.150 GB


Evaluating:  33%|███▎      | 164/500 [00:48<01:38,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 164: GPU Memory Used: 3.146 GB


Evaluating:  33%|███▎      | 165/500 [00:48<01:36,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 165: GPU Memory Used: 3.143 GB


Evaluating:  33%|███▎      | 166/500 [00:48<01:35,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 166: GPU Memory Used: 3.143 GB


Evaluating:  33%|███▎      | 167/500 [00:49<01:35,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 167: GPU Memory Used: 3.145 GB


Evaluating:  34%|███▎      | 168/500 [00:49<01:34,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 168: GPU Memory Used: 3.145 GB


Evaluating:  34%|███▍      | 169/500 [00:49<01:34,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 169: GPU Memory Used: 3.145 GB


Evaluating:  34%|███▍      | 170/500 [00:49<01:34,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 170: GPU Memory Used: 3.146 GB


Evaluating:  34%|███▍      | 171/500 [00:50<01:38,  3.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 171: GPU Memory Used: 3.146 GB


Evaluating:  34%|███▍      | 172/500 [00:50<01:36,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 172: GPU Memory Used: 3.146 GB


Evaluating:  35%|███▍      | 173/500 [00:50<01:36,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 173: GPU Memory Used: 3.146 GB


Evaluating:  35%|███▍      | 174/500 [00:51<01:34,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 174: GPU Memory Used: 3.145 GB


Evaluating:  35%|███▌      | 175/500 [00:51<01:33,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 175: GPU Memory Used: 3.143 GB


Evaluating:  35%|███▌      | 176/500 [00:51<01:35,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 176: GPU Memory Used: 3.150 GB


Evaluating:  35%|███▌      | 177/500 [00:51<01:34,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 177: GPU Memory Used: 3.146 GB


Evaluating:  36%|███▌      | 178/500 [00:52<01:33,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 178: GPU Memory Used: 3.146 GB


Evaluating:  36%|███▌      | 179/500 [00:52<01:32,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 179: GPU Memory Used: 3.146 GB


Evaluating:  36%|███▌      | 180/500 [00:52<01:31,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 180: GPU Memory Used: 3.143 GB


Evaluating:  36%|███▌      | 181/500 [00:53<01:31,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 181: GPU Memory Used: 3.143 GB


Evaluating:  36%|███▋      | 182/500 [00:53<01:31,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 182: GPU Memory Used: 3.148 GB


Evaluating:  37%|███▋      | 183/500 [00:53<01:30,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 183: GPU Memory Used: 3.145 GB


Evaluating:  37%|███▋      | 184/500 [00:53<01:31,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 184: GPU Memory Used: 3.148 GB


Evaluating:  37%|███▋      | 185/500 [00:54<01:30,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 185: GPU Memory Used: 3.146 GB


Evaluating:  37%|███▋      | 186/500 [00:54<01:32,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 186: GPU Memory Used: 3.150 GB


Evaluating:  37%|███▋      | 187/500 [00:54<01:33,  3.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 187: GPU Memory Used: 3.150 GB


Evaluating:  38%|███▊      | 188/500 [00:55<01:32,  3.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 188: GPU Memory Used: 3.148 GB


Evaluating:  38%|███▊      | 189/500 [00:55<01:31,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 189: GPU Memory Used: 3.145 GB


Evaluating:  38%|███▊      | 190/500 [00:55<01:30,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 190: GPU Memory Used: 3.145 GB


Evaluating:  38%|███▊      | 191/500 [00:56<01:29,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 191: GPU Memory Used: 3.145 GB


Evaluating:  38%|███▊      | 192/500 [00:56<01:29,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 192: GPU Memory Used: 3.148 GB


Evaluating:  39%|███▊      | 193/500 [00:56<01:28,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 193: GPU Memory Used: 3.145 GB


Evaluating:  39%|███▉      | 194/500 [00:56<01:28,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 194: GPU Memory Used: 3.146 GB


Evaluating:  39%|███▉      | 195/500 [00:57<01:28,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 195: GPU Memory Used: 3.148 GB


Evaluating:  39%|███▉      | 196/500 [00:57<01:27,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 196: GPU Memory Used: 3.146 GB


Evaluating:  39%|███▉      | 197/500 [00:57<01:29,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 197: GPU Memory Used: 3.150 GB


Evaluating:  40%|███▉      | 198/500 [00:58<01:28,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 198: GPU Memory Used: 3.146 GB


Evaluating:  40%|███▉      | 199/500 [00:58<01:27,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 199: GPU Memory Used: 3.146 GB


Evaluating:  40%|████      | 200/500 [00:58<01:26,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 200: GPU Memory Used: 3.145 GB


Evaluating:  40%|████      | 201/500 [00:58<01:26,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 201: GPU Memory Used: 3.146 GB


Evaluating:  40%|████      | 202/500 [00:59<01:25,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 202: GPU Memory Used: 3.145 GB


Evaluating:  41%|████      | 203/500 [00:59<01:25,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 203: GPU Memory Used: 3.143 GB


Evaluating:  41%|████      | 204/500 [00:59<01:24,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 204: GPU Memory Used: 3.145 GB


Evaluating:  41%|████      | 205/500 [01:00<01:24,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 205: GPU Memory Used: 3.145 GB


Evaluating:  41%|████      | 206/500 [01:00<01:24,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 206: GPU Memory Used: 3.146 GB


Evaluating:  41%|████▏     | 207/500 [01:00<01:24,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 207: GPU Memory Used: 3.148 GB


Evaluating:  42%|████▏     | 208/500 [01:00<01:25,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 208: GPU Memory Used: 3.148 GB


Evaluating:  42%|████▏     | 209/500 [01:01<01:24,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 209: GPU Memory Used: 3.146 GB


Evaluating:  42%|████▏     | 210/500 [01:01<01:24,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 210: GPU Memory Used: 3.145 GB


Evaluating:  42%|████▏     | 211/500 [01:01<01:23,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 211: GPU Memory Used: 3.145 GB


Evaluating:  42%|████▏     | 212/500 [01:02<01:25,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 212: GPU Memory Used: 3.148 GB


Evaluating:  43%|████▎     | 213/500 [01:02<01:24,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 213: GPU Memory Used: 3.145 GB


Evaluating:  43%|████▎     | 214/500 [01:02<01:25,  3.34it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 214: GPU Memory Used: 3.148 GB


Evaluating:  43%|████▎     | 215/500 [01:03<01:26,  3.30it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 215: GPU Memory Used: 3.148 GB


Evaluating:  43%|████▎     | 216/500 [01:03<01:24,  3.36it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 216: GPU Memory Used: 3.146 GB


Evaluating:  43%|████▎     | 217/500 [01:03<01:23,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 217: GPU Memory Used: 3.145 GB


Evaluating:  44%|████▎     | 218/500 [01:03<01:22,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 218: GPU Memory Used: 3.146 GB


Evaluating:  44%|████▍     | 219/500 [01:04<01:20,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 219: GPU Memory Used: 3.145 GB


Evaluating:  44%|████▍     | 220/500 [01:04<01:19,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 220: GPU Memory Used: 3.146 GB


Evaluating:  44%|████▍     | 221/500 [01:04<01:19,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 221: GPU Memory Used: 3.145 GB


Evaluating:  44%|████▍     | 222/500 [01:05<01:19,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 222: GPU Memory Used: 3.146 GB


Evaluating:  45%|████▍     | 223/500 [01:05<01:19,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 223: GPU Memory Used: 3.145 GB


Evaluating:  45%|████▍     | 224/500 [01:05<01:19,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 224: GPU Memory Used: 3.146 GB


Evaluating:  45%|████▌     | 225/500 [01:05<01:18,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 225: GPU Memory Used: 3.145 GB


Evaluating:  45%|████▌     | 226/500 [01:06<01:18,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 226: GPU Memory Used: 3.146 GB


Evaluating:  45%|████▌     | 227/500 [01:06<01:20,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 227: GPU Memory Used: 3.148 GB


Evaluating:  46%|████▌     | 228/500 [01:06<01:19,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 228: GPU Memory Used: 3.146 GB


Evaluating:  46%|████▌     | 229/500 [01:07<01:18,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 229: GPU Memory Used: 3.145 GB


Evaluating:  46%|████▌     | 230/500 [01:07<01:18,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 230: GPU Memory Used: 3.146 GB


Evaluating:  46%|████▌     | 231/500 [01:07<01:19,  3.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 231: GPU Memory Used: 3.145 GB


Evaluating:  46%|████▋     | 232/500 [01:08<01:22,  3.26it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 232: GPU Memory Used: 3.168 GB


Evaluating:  47%|████▋     | 233/500 [01:08<01:20,  3.33it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 233: GPU Memory Used: 3.145 GB


Evaluating:  47%|████▋     | 234/500 [01:08<01:18,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 234: GPU Memory Used: 3.143 GB


Evaluating:  47%|████▋     | 235/500 [01:08<01:17,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 235: GPU Memory Used: 3.145 GB


Evaluating:  47%|████▋     | 236/500 [01:09<01:16,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 236: GPU Memory Used: 3.145 GB


Evaluating:  47%|████▋     | 237/500 [01:09<01:16,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 237: GPU Memory Used: 3.146 GB


Evaluating:  48%|████▊     | 238/500 [01:09<01:15,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 238: GPU Memory Used: 3.146 GB


Evaluating:  48%|████▊     | 239/500 [01:10<01:16,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 239: GPU Memory Used: 3.148 GB


Evaluating:  48%|████▊     | 240/500 [01:10<01:16,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 240: GPU Memory Used: 3.145 GB


Evaluating:  48%|████▊     | 241/500 [01:10<01:15,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 241: GPU Memory Used: 3.145 GB


Evaluating:  48%|████▊     | 242/500 [01:10<01:14,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 242: GPU Memory Used: 3.146 GB


Evaluating:  49%|████▊     | 243/500 [01:11<01:14,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 243: GPU Memory Used: 3.146 GB


Evaluating:  49%|████▉     | 244/500 [01:11<01:13,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 244: GPU Memory Used: 3.145 GB


Evaluating:  49%|████▉     | 245/500 [01:11<01:13,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 245: GPU Memory Used: 3.148 GB


Evaluating:  49%|████▉     | 246/500 [01:12<01:13,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 246: GPU Memory Used: 3.145 GB


Evaluating:  49%|████▉     | 247/500 [01:12<01:12,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 247: GPU Memory Used: 3.145 GB


Evaluating:  50%|████▉     | 248/500 [01:12<01:12,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 248: GPU Memory Used: 3.146 GB


Evaluating:  50%|████▉     | 249/500 [01:12<01:12,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 249: GPU Memory Used: 3.145 GB


Evaluating:  50%|█████     | 250/500 [01:13<01:11,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 250: GPU Memory Used: 3.143 GB


Evaluating:  50%|█████     | 251/500 [01:13<01:11,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 251: GPU Memory Used: 3.145 GB


Evaluating:  50%|█████     | 252/500 [01:13<01:11,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 252: GPU Memory Used: 3.148 GB


Evaluating:  51%|█████     | 253/500 [01:14<01:10,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 253: GPU Memory Used: 3.146 GB


Evaluating:  51%|█████     | 254/500 [01:14<01:10,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 254: GPU Memory Used: 3.145 GB


Evaluating:  51%|█████     | 255/500 [01:14<01:10,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 255: GPU Memory Used: 3.146 GB


Evaluating:  51%|█████     | 256/500 [01:14<01:09,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 256: GPU Memory Used: 3.145 GB


Evaluating:  51%|█████▏    | 257/500 [01:15<01:09,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 257: GPU Memory Used: 3.143 GB


Evaluating:  52%|█████▏    | 258/500 [01:15<01:10,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 258: GPU Memory Used: 3.150 GB


Evaluating:  52%|█████▏    | 259/500 [01:15<01:10,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 259: GPU Memory Used: 3.146 GB


Evaluating:  52%|█████▏    | 260/500 [01:16<01:09,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 260: GPU Memory Used: 3.146 GB


Evaluating:  52%|█████▏    | 261/500 [01:16<01:09,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 261: GPU Memory Used: 3.146 GB


Evaluating:  52%|█████▏    | 262/500 [01:16<01:08,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 262: GPU Memory Used: 3.145 GB


Evaluating:  53%|█████▎    | 263/500 [01:16<01:08,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 263: GPU Memory Used: 3.146 GB


Evaluating:  53%|█████▎    | 264/500 [01:17<01:07,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 264: GPU Memory Used: 3.145 GB


Evaluating:  53%|█████▎    | 265/500 [01:17<01:07,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 265: GPU Memory Used: 3.146 GB


Evaluating:  53%|█████▎    | 266/500 [01:17<01:06,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 266: GPU Memory Used: 3.146 GB


Evaluating:  53%|█████▎    | 267/500 [01:18<01:06,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 267: GPU Memory Used: 3.145 GB


Evaluating:  54%|█████▎    | 268/500 [01:18<01:06,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 268: GPU Memory Used: 3.145 GB


Evaluating:  54%|█████▍    | 269/500 [01:18<01:06,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 269: GPU Memory Used: 3.148 GB


Evaluating:  54%|█████▍    | 270/500 [01:18<01:09,  3.32it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 270: GPU Memory Used: 3.146 GB


Evaluating:  54%|█████▍    | 271/500 [01:19<01:08,  3.36it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 271: GPU Memory Used: 3.145 GB


Evaluating:  54%|█████▍    | 272/500 [01:19<01:06,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 272: GPU Memory Used: 3.143 GB


Evaluating:  55%|█████▍    | 273/500 [01:19<01:05,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 273: GPU Memory Used: 3.145 GB


Evaluating:  55%|█████▍    | 274/500 [01:20<01:05,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 274: GPU Memory Used: 3.145 GB


Evaluating:  55%|█████▌    | 275/500 [01:20<01:04,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 275: GPU Memory Used: 3.145 GB


Evaluating:  55%|█████▌    | 276/500 [01:20<01:04,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 276: GPU Memory Used: 3.148 GB


Evaluating:  55%|█████▌    | 277/500 [01:20<01:04,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 277: GPU Memory Used: 3.146 GB


Evaluating:  56%|█████▌    | 278/500 [01:21<01:03,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 278: GPU Memory Used: 3.145 GB


Evaluating:  56%|█████▌    | 279/500 [01:21<01:03,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 279: GPU Memory Used: 3.148 GB


Evaluating:  56%|█████▌    | 280/500 [01:21<01:03,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 280: GPU Memory Used: 3.145 GB


Evaluating:  56%|█████▌    | 281/500 [01:22<01:03,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 281: GPU Memory Used: 3.145 GB


Evaluating:  56%|█████▋    | 282/500 [01:22<01:02,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 282: GPU Memory Used: 3.148 GB


Evaluating:  57%|█████▋    | 283/500 [01:22<01:02,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 283: GPU Memory Used: 3.146 GB


Evaluating:  57%|█████▋    | 284/500 [01:22<01:02,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 284: GPU Memory Used: 3.145 GB


Evaluating:  57%|█████▋    | 285/500 [01:23<01:03,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 285: GPU Memory Used: 3.148 GB


Evaluating:  57%|█████▋    | 286/500 [01:23<01:02,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 286: GPU Memory Used: 3.145 GB


Evaluating:  57%|█████▋    | 287/500 [01:23<01:01,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 287: GPU Memory Used: 3.146 GB


Evaluating:  58%|█████▊    | 288/500 [01:24<01:01,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 288: GPU Memory Used: 3.146 GB


Evaluating:  58%|█████▊    | 289/500 [01:24<01:00,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 289: GPU Memory Used: 3.145 GB


Evaluating:  58%|█████▊    | 290/500 [01:24<01:00,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 290: GPU Memory Used: 3.148 GB


Evaluating:  58%|█████▊    | 291/500 [01:25<01:01,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 291: GPU Memory Used: 3.150 GB


Evaluating:  58%|█████▊    | 292/500 [01:25<01:02,  3.33it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 292: GPU Memory Used: 3.148 GB


Evaluating:  59%|█████▊    | 293/500 [01:25<01:01,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 293: GPU Memory Used: 3.145 GB


Evaluating:  59%|█████▉    | 294/500 [01:25<01:00,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 294: GPU Memory Used: 3.146 GB


Evaluating:  59%|█████▉    | 295/500 [01:26<00:59,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 295: GPU Memory Used: 3.145 GB


Evaluating:  59%|█████▉    | 296/500 [01:26<00:58,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 296: GPU Memory Used: 3.145 GB


Evaluating:  59%|█████▉    | 297/500 [01:26<00:58,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 297: GPU Memory Used: 3.145 GB


Evaluating:  60%|█████▉    | 298/500 [01:27<00:58,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 298: GPU Memory Used: 3.145 GB


Evaluating:  60%|█████▉    | 299/500 [01:27<00:59,  3.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 299: GPU Memory Used: 3.150 GB


Evaluating:  60%|██████    | 300/500 [01:27<01:00,  3.33it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 300: GPU Memory Used: 3.150 GB


Evaluating:  60%|██████    | 301/500 [01:27<00:58,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 301: GPU Memory Used: 3.145 GB


Evaluating:  60%|██████    | 302/500 [01:28<00:58,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 302: GPU Memory Used: 3.146 GB


Evaluating:  61%|██████    | 303/500 [01:28<00:57,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 303: GPU Memory Used: 3.145 GB


Evaluating:  61%|██████    | 304/500 [01:28<00:56,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 304: GPU Memory Used: 3.146 GB


Evaluating:  61%|██████    | 305/500 [01:29<00:56,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 305: GPU Memory Used: 3.145 GB


Evaluating:  61%|██████    | 306/500 [01:29<00:55,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 306: GPU Memory Used: 3.145 GB


Evaluating:  61%|██████▏   | 307/500 [01:29<00:55,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 307: GPU Memory Used: 3.146 GB


Evaluating:  62%|██████▏   | 308/500 [01:29<00:55,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 308: GPU Memory Used: 3.145 GB


Evaluating:  62%|██████▏   | 309/500 [01:30<00:54,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 309: GPU Memory Used: 3.145 GB


Evaluating:  62%|██████▏   | 310/500 [01:30<00:54,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 310: GPU Memory Used: 3.146 GB


Evaluating:  62%|██████▏   | 311/500 [01:30<00:55,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 311: GPU Memory Used: 3.146 GB


Evaluating:  62%|██████▏   | 312/500 [01:31<00:55,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 312: GPU Memory Used: 3.148 GB


Evaluating:  63%|██████▎   | 313/500 [01:31<00:54,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 313: GPU Memory Used: 3.143 GB


Evaluating:  63%|██████▎   | 314/500 [01:31<00:53,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 314: GPU Memory Used: 3.146 GB


Evaluating:  63%|██████▎   | 315/500 [01:32<00:53,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 315: GPU Memory Used: 3.146 GB


Evaluating:  63%|██████▎   | 316/500 [01:32<00:53,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 316: GPU Memory Used: 3.146 GB


Evaluating:  63%|██████▎   | 317/500 [01:32<00:52,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 317: GPU Memory Used: 3.145 GB


Evaluating:  64%|██████▎   | 318/500 [01:32<00:52,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 318: GPU Memory Used: 3.146 GB


Evaluating:  64%|██████▍   | 319/500 [01:33<00:53,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 319: GPU Memory Used: 3.152 GB


Evaluating:  64%|██████▍   | 320/500 [01:33<00:52,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 320: GPU Memory Used: 3.143 GB


Evaluating:  64%|██████▍   | 321/500 [01:33<00:53,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 321: GPU Memory Used: 3.152 GB


Evaluating:  64%|██████▍   | 322/500 [01:34<00:52,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 322: GPU Memory Used: 3.145 GB


Evaluating:  65%|██████▍   | 323/500 [01:34<00:51,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 323: GPU Memory Used: 3.146 GB


Evaluating:  65%|██████▍   | 324/500 [01:34<00:51,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 324: GPU Memory Used: 3.145 GB


Evaluating:  65%|██████▌   | 325/500 [01:34<00:51,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 325: GPU Memory Used: 3.145 GB


Evaluating:  65%|██████▌   | 326/500 [01:35<00:51,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 326: GPU Memory Used: 3.148 GB


Evaluating:  65%|██████▌   | 327/500 [01:35<00:50,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 327: GPU Memory Used: 3.143 GB


Evaluating:  66%|██████▌   | 328/500 [01:35<00:50,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 328: GPU Memory Used: 3.145 GB


Evaluating:  66%|██████▌   | 329/500 [01:36<00:49,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 329: GPU Memory Used: 3.145 GB


Evaluating:  66%|██████▌   | 330/500 [01:36<00:49,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 330: GPU Memory Used: 3.145 GB


Evaluating:  66%|██████▌   | 331/500 [01:36<00:48,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 331: GPU Memory Used: 3.146 GB


Evaluating:  66%|██████▋   | 332/500 [01:37<00:49,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 332: GPU Memory Used: 3.150 GB


Evaluating:  67%|██████▋   | 333/500 [01:37<00:48,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 333: GPU Memory Used: 3.145 GB


Evaluating:  67%|██████▋   | 334/500 [01:37<00:48,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 334: GPU Memory Used: 3.146 GB


Evaluating:  67%|██████▋   | 335/500 [01:37<00:47,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 335: GPU Memory Used: 3.146 GB


Evaluating:  67%|██████▋   | 336/500 [01:38<00:47,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 336: GPU Memory Used: 3.146 GB


Evaluating:  67%|██████▋   | 337/500 [01:38<00:47,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 337: GPU Memory Used: 3.146 GB


Evaluating:  68%|██████▊   | 338/500 [01:38<00:46,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 338: GPU Memory Used: 3.148 GB


Evaluating:  68%|██████▊   | 339/500 [01:39<00:46,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 339: GPU Memory Used: 3.146 GB


Evaluating:  68%|██████▊   | 340/500 [01:39<00:46,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 340: GPU Memory Used: 3.146 GB


Evaluating:  68%|██████▊   | 341/500 [01:39<00:45,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 341: GPU Memory Used: 3.145 GB


Evaluating:  68%|██████▊   | 342/500 [01:39<00:45,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 342: GPU Memory Used: 3.145 GB


Evaluating:  69%|██████▊   | 343/500 [01:40<00:44,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 343: GPU Memory Used: 3.145 GB


Evaluating:  69%|██████▉   | 344/500 [01:40<00:45,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 344: GPU Memory Used: 3.150 GB


Evaluating:  69%|██████▉   | 345/500 [01:40<00:46,  3.36it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 345: GPU Memory Used: 3.148 GB


Evaluating:  69%|██████▉   | 346/500 [01:41<00:45,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 346: GPU Memory Used: 3.148 GB


Evaluating:  69%|██████▉   | 347/500 [01:41<00:44,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 347: GPU Memory Used: 3.145 GB


Evaluating:  70%|██████▉   | 348/500 [01:41<00:44,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 348: GPU Memory Used: 3.145 GB


Evaluating:  70%|██████▉   | 349/500 [01:41<00:43,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 349: GPU Memory Used: 3.143 GB


Evaluating:  70%|███████   | 350/500 [01:42<00:43,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 350: GPU Memory Used: 3.145 GB


Evaluating:  70%|███████   | 351/500 [01:42<00:42,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 351: GPU Memory Used: 3.145 GB


Evaluating:  70%|███████   | 352/500 [01:42<00:42,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 352: GPU Memory Used: 3.148 GB


Evaluating:  71%|███████   | 353/500 [01:43<00:42,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 353: GPU Memory Used: 3.145 GB


Evaluating:  71%|███████   | 354/500 [01:43<00:41,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 354: GPU Memory Used: 3.145 GB


Evaluating:  71%|███████   | 355/500 [01:43<00:41,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 355: GPU Memory Used: 3.145 GB


Evaluating:  71%|███████   | 356/500 [01:43<00:41,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 356: GPU Memory Used: 3.145 GB


Evaluating:  71%|███████▏  | 357/500 [01:44<00:41,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 357: GPU Memory Used: 3.148 GB


Evaluating:  72%|███████▏  | 358/500 [01:44<00:41,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 358: GPU Memory Used: 3.148 GB


Evaluating:  72%|███████▏  | 359/500 [01:44<00:40,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 359: GPU Memory Used: 3.145 GB


Evaluating:  72%|███████▏  | 360/500 [01:45<00:40,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 360: GPU Memory Used: 3.145 GB


Evaluating:  72%|███████▏  | 361/500 [01:45<00:40,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 361: GPU Memory Used: 3.148 GB


Evaluating:  72%|███████▏  | 362/500 [01:45<00:39,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 362: GPU Memory Used: 3.146 GB


Evaluating:  73%|███████▎  | 363/500 [01:45<00:39,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 363: GPU Memory Used: 3.145 GB


Evaluating:  73%|███████▎  | 364/500 [01:46<00:38,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 364: GPU Memory Used: 3.145 GB


Evaluating:  73%|███████▎  | 365/500 [01:46<00:39,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 365: GPU Memory Used: 3.150 GB


Evaluating:  73%|███████▎  | 366/500 [01:46<00:38,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 366: GPU Memory Used: 3.145 GB


Evaluating:  73%|███████▎  | 367/500 [01:47<00:38,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 367: GPU Memory Used: 3.145 GB


Evaluating:  74%|███████▎  | 368/500 [01:47<00:37,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 368: GPU Memory Used: 3.145 GB


Evaluating:  74%|███████▍  | 369/500 [01:47<00:37,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 369: GPU Memory Used: 3.146 GB


Evaluating:  74%|███████▍  | 370/500 [01:47<00:37,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 370: GPU Memory Used: 3.145 GB


Evaluating:  74%|███████▍  | 371/500 [01:48<00:37,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 371: GPU Memory Used: 3.146 GB


Evaluating:  74%|███████▍  | 372/500 [01:48<00:36,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 372: GPU Memory Used: 3.145 GB


Evaluating:  75%|███████▍  | 373/500 [01:48<00:36,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 373: GPU Memory Used: 3.146 GB


Evaluating:  75%|███████▍  | 374/500 [01:49<00:36,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 374: GPU Memory Used: 3.146 GB


Evaluating:  75%|███████▌  | 375/500 [01:49<00:37,  3.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 375: GPU Memory Used: 3.146 GB


Evaluating:  75%|███████▌  | 376/500 [01:49<00:36,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 376: GPU Memory Used: 3.146 GB


Evaluating:  75%|███████▌  | 377/500 [01:50<00:35,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 377: GPU Memory Used: 3.145 GB


Evaluating:  76%|███████▌  | 378/500 [01:50<00:35,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 378: GPU Memory Used: 3.143 GB


Evaluating:  76%|███████▌  | 379/500 [01:50<00:35,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 379: GPU Memory Used: 3.148 GB


Evaluating:  76%|███████▌  | 380/500 [01:50<00:34,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 380: GPU Memory Used: 3.145 GB


Evaluating:  76%|███████▌  | 381/500 [01:51<00:34,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 381: GPU Memory Used: 3.146 GB


Evaluating:  76%|███████▋  | 382/500 [01:51<00:33,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 382: GPU Memory Used: 3.145 GB


Evaluating:  77%|███████▋  | 383/500 [01:51<00:33,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 383: GPU Memory Used: 3.146 GB


Evaluating:  77%|███████▋  | 384/500 [01:52<00:33,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 384: GPU Memory Used: 3.146 GB


Evaluating:  77%|███████▋  | 385/500 [01:52<00:32,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 385: GPU Memory Used: 3.146 GB


Evaluating:  77%|███████▋  | 386/500 [01:52<00:32,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 386: GPU Memory Used: 3.143 GB


Evaluating:  77%|███████▋  | 387/500 [01:52<00:32,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 387: GPU Memory Used: 3.143 GB


Evaluating:  78%|███████▊  | 388/500 [01:53<00:32,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 388: GPU Memory Used: 3.148 GB


Evaluating:  78%|███████▊  | 389/500 [01:53<00:32,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 389: GPU Memory Used: 3.145 GB


Evaluating:  78%|███████▊  | 390/500 [01:53<00:31,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 390: GPU Memory Used: 3.146 GB


Evaluating:  78%|███████▊  | 391/500 [01:54<00:31,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 391: GPU Memory Used: 3.146 GB


Evaluating:  78%|███████▊  | 392/500 [01:54<00:31,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 392: GPU Memory Used: 3.150 GB


Evaluating:  79%|███████▊  | 393/500 [01:54<00:32,  3.33it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 393: GPU Memory Used: 3.150 GB


Evaluating:  79%|███████▉  | 394/500 [01:54<00:31,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 394: GPU Memory Used: 3.146 GB


Evaluating:  79%|███████▉  | 395/500 [01:55<00:31,  3.34it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 395: GPU Memory Used: 3.148 GB


Evaluating:  79%|███████▉  | 396/500 [01:55<00:30,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 396: GPU Memory Used: 3.145 GB


Evaluating:  79%|███████▉  | 397/500 [01:55<00:30,  3.34it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 397: GPU Memory Used: 3.148 GB


Evaluating:  80%|███████▉  | 398/500 [01:56<00:30,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 398: GPU Memory Used: 3.145 GB


Evaluating:  80%|███████▉  | 399/500 [01:56<00:29,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 399: GPU Memory Used: 3.148 GB


Evaluating:  80%|████████  | 400/500 [01:56<00:29,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 400: GPU Memory Used: 3.148 GB


Evaluating:  80%|████████  | 401/500 [01:56<00:28,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 401: GPU Memory Used: 3.143 GB


Evaluating:  80%|████████  | 402/500 [01:57<00:28,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 402: GPU Memory Used: 3.148 GB


Evaluating:  81%|████████  | 403/500 [01:57<00:28,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 403: GPU Memory Used: 3.148 GB


Evaluating:  81%|████████  | 404/500 [01:57<00:27,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 404: GPU Memory Used: 3.146 GB


Evaluating:  81%|████████  | 405/500 [01:58<00:27,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 405: GPU Memory Used: 3.148 GB


Evaluating:  81%|████████  | 406/500 [01:58<00:27,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 406: GPU Memory Used: 3.148 GB


Evaluating:  81%|████████▏ | 407/500 [01:58<00:26,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 407: GPU Memory Used: 3.146 GB


Evaluating:  82%|████████▏ | 408/500 [01:59<00:26,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 408: GPU Memory Used: 3.146 GB


Evaluating:  82%|████████▏ | 409/500 [01:59<00:26,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 409: GPU Memory Used: 3.148 GB


Evaluating:  82%|████████▏ | 410/500 [01:59<00:26,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 410: GPU Memory Used: 3.146 GB


Evaluating:  82%|████████▏ | 411/500 [01:59<00:25,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 411: GPU Memory Used: 3.148 GB


Evaluating:  82%|████████▏ | 412/500 [02:00<00:25,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 412: GPU Memory Used: 3.145 GB


Evaluating:  83%|████████▎ | 413/500 [02:00<00:25,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 413: GPU Memory Used: 3.145 GB


Evaluating:  83%|████████▎ | 414/500 [02:00<00:24,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 414: GPU Memory Used: 3.148 GB


Evaluating:  83%|████████▎ | 415/500 [02:01<00:24,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 415: GPU Memory Used: 3.146 GB


Evaluating:  83%|████████▎ | 416/500 [02:01<00:24,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 416: GPU Memory Used: 3.148 GB


Evaluating:  83%|████████▎ | 417/500 [02:01<00:24,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 417: GPU Memory Used: 3.146 GB


Evaluating:  84%|████████▎ | 418/500 [02:01<00:23,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 418: GPU Memory Used: 3.145 GB


Evaluating:  84%|████████▍ | 419/500 [02:02<00:23,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 419: GPU Memory Used: 3.145 GB


Evaluating:  84%|████████▍ | 420/500 [02:02<00:23,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 420: GPU Memory Used: 3.148 GB


Evaluating:  84%|████████▍ | 421/500 [02:02<00:22,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 421: GPU Memory Used: 3.143 GB


Evaluating:  84%|████████▍ | 422/500 [02:03<00:22,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 422: GPU Memory Used: 3.148 GB


Evaluating:  85%|████████▍ | 423/500 [02:03<00:22,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 423: GPU Memory Used: 3.148 GB


Evaluating:  85%|████████▍ | 424/500 [02:03<00:22,  3.36it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 424: GPU Memory Used: 3.148 GB


Evaluating:  85%|████████▌ | 425/500 [02:03<00:22,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 425: GPU Memory Used: 3.146 GB


Evaluating:  85%|████████▌ | 426/500 [02:04<00:22,  3.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 426: GPU Memory Used: 3.148 GB


Evaluating:  85%|████████▌ | 427/500 [02:04<00:21,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 427: GPU Memory Used: 3.146 GB


Evaluating:  86%|████████▌ | 428/500 [02:04<00:20,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 428: GPU Memory Used: 3.143 GB


Evaluating:  86%|████████▌ | 429/500 [02:05<00:21,  3.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 429: GPU Memory Used: 3.148 GB


Evaluating:  86%|████████▌ | 430/500 [02:05<00:20,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 430: GPU Memory Used: 3.145 GB


Evaluating:  86%|████████▌ | 431/500 [02:05<00:20,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 431: GPU Memory Used: 3.143 GB


Evaluating:  86%|████████▋ | 432/500 [02:06<00:20,  3.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 432: GPU Memory Used: 3.152 GB


Evaluating:  87%|████████▋ | 433/500 [02:06<00:19,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 433: GPU Memory Used: 3.146 GB


Evaluating:  87%|████████▋ | 434/500 [02:06<00:19,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 434: GPU Memory Used: 3.146 GB


Evaluating:  87%|████████▋ | 435/500 [02:06<00:19,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 435: GPU Memory Used: 3.152 GB


Evaluating:  87%|████████▋ | 436/500 [02:07<00:19,  3.31it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 436: GPU Memory Used: 3.152 GB


Evaluating:  87%|████████▋ | 437/500 [02:07<00:19,  3.29it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 437: GPU Memory Used: 3.152 GB


Evaluating:  88%|████████▊ | 438/500 [02:07<00:18,  3.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 438: GPU Memory Used: 3.143 GB


Evaluating:  88%|████████▊ | 439/500 [02:08<00:18,  3.32it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 439: GPU Memory Used: 3.152 GB


Evaluating:  88%|████████▊ | 440/500 [02:08<00:17,  3.36it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 440: GPU Memory Used: 3.148 GB


Evaluating:  88%|████████▊ | 441/500 [02:08<00:17,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 441: GPU Memory Used: 3.145 GB


Evaluating:  88%|████████▊ | 442/500 [02:08<00:16,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 442: GPU Memory Used: 3.143 GB


Evaluating:  89%|████████▊ | 443/500 [02:09<00:16,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 443: GPU Memory Used: 3.146 GB


Evaluating:  89%|████████▉ | 444/500 [02:09<00:16,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 444: GPU Memory Used: 3.146 GB


Evaluating:  89%|████████▉ | 445/500 [02:09<00:15,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 445: GPU Memory Used: 3.146 GB


Evaluating:  89%|████████▉ | 446/500 [02:10<00:15,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 446: GPU Memory Used: 3.145 GB


Evaluating:  89%|████████▉ | 447/500 [02:10<00:15,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 447: GPU Memory Used: 3.145 GB


Evaluating:  90%|████████▉ | 448/500 [02:10<00:14,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 448: GPU Memory Used: 3.145 GB


Evaluating:  90%|████████▉ | 449/500 [02:11<00:14,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 449: GPU Memory Used: 3.148 GB


Evaluating:  90%|█████████ | 450/500 [02:11<00:14,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 450: GPU Memory Used: 3.146 GB


Evaluating:  90%|█████████ | 451/500 [02:11<00:14,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 451: GPU Memory Used: 3.148 GB


Evaluating:  90%|█████████ | 452/500 [02:11<00:13,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 452: GPU Memory Used: 3.143 GB


Evaluating:  91%|█████████ | 453/500 [02:12<00:13,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 453: GPU Memory Used: 3.145 GB


Evaluating:  91%|█████████ | 454/500 [02:12<00:13,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 454: GPU Memory Used: 3.146 GB


Evaluating:  91%|█████████ | 455/500 [02:12<00:13,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 455: GPU Memory Used: 3.148 GB


Evaluating:  91%|█████████ | 456/500 [02:13<00:13,  3.36it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 456: GPU Memory Used: 3.150 GB


Evaluating:  91%|█████████▏| 457/500 [02:13<00:12,  3.40it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 457: GPU Memory Used: 3.145 GB


Evaluating:  92%|█████████▏| 458/500 [02:13<00:12,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 458: GPU Memory Used: 3.146 GB


Evaluating:  92%|█████████▏| 459/500 [02:13<00:12,  3.35it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 459: GPU Memory Used: 3.145 GB


Evaluating:  92%|█████████▏| 460/500 [02:14<00:11,  3.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 460: GPU Memory Used: 3.146 GB


Evaluating:  92%|█████████▏| 461/500 [02:14<00:11,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 461: GPU Memory Used: 3.143 GB


Evaluating:  92%|█████████▏| 462/500 [02:14<00:11,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 462: GPU Memory Used: 3.150 GB


Evaluating:  93%|█████████▎| 463/500 [02:15<00:11,  3.31it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 463: GPU Memory Used: 3.148 GB


Evaluating:  93%|█████████▎| 464/500 [02:15<00:10,  3.36it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 464: GPU Memory Used: 3.146 GB


Evaluating:  93%|█████████▎| 465/500 [02:15<00:10,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 465: GPU Memory Used: 3.145 GB


Evaluating:  93%|█████████▎| 466/500 [02:16<00:09,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 466: GPU Memory Used: 3.146 GB


Evaluating:  93%|█████████▎| 467/500 [02:16<00:09,  3.37it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 467: GPU Memory Used: 3.148 GB


Evaluating:  94%|█████████▎| 468/500 [02:16<00:09,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 468: GPU Memory Used: 3.146 GB


Evaluating:  94%|█████████▍| 469/500 [02:16<00:09,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 469: GPU Memory Used: 3.146 GB


Evaluating:  94%|█████████▍| 470/500 [02:17<00:08,  3.38it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 470: GPU Memory Used: 3.148 GB


Evaluating:  94%|█████████▍| 471/500 [02:17<00:08,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 471: GPU Memory Used: 3.146 GB


Evaluating:  94%|█████████▍| 472/500 [02:17<00:08,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 472: GPU Memory Used: 3.145 GB


Evaluating:  95%|█████████▍| 473/500 [02:18<00:07,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 473: GPU Memory Used: 3.146 GB


Evaluating:  95%|█████████▍| 474/500 [02:18<00:07,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 474: GPU Memory Used: 3.145 GB


Evaluating:  95%|█████████▌| 475/500 [02:18<00:07,  3.41it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 475: GPU Memory Used: 3.152 GB


Evaluating:  95%|█████████▌| 476/500 [02:18<00:06,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 476: GPU Memory Used: 3.145 GB


Evaluating:  95%|█████████▌| 477/500 [02:19<00:06,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 477: GPU Memory Used: 3.145 GB


Evaluating:  96%|█████████▌| 478/500 [02:19<00:06,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 478: GPU Memory Used: 3.148 GB


Evaluating:  96%|█████████▌| 479/500 [02:19<00:06,  3.48it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 479: GPU Memory Used: 3.145 GB


Evaluating:  96%|█████████▌| 480/500 [02:20<00:05,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 480: GPU Memory Used: 3.146 GB


Evaluating:  96%|█████████▌| 481/500 [02:20<00:05,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 481: GPU Memory Used: 3.145 GB


Evaluating:  96%|█████████▋| 482/500 [02:20<00:05,  3.51it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 482: GPU Memory Used: 3.143 GB


Evaluating:  97%|█████████▋| 483/500 [02:20<00:04,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 483: GPU Memory Used: 3.148 GB


Evaluating:  97%|█████████▋| 484/500 [02:21<00:04,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 484: GPU Memory Used: 3.150 GB


Evaluating:  97%|█████████▋| 485/500 [02:21<00:04,  3.44it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 485: GPU Memory Used: 3.146 GB


Evaluating:  97%|█████████▋| 486/500 [02:21<00:04,  3.45it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 486: GPU Memory Used: 3.146 GB


Evaluating:  97%|█████████▋| 487/500 [02:22<00:03,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 487: GPU Memory Used: 3.145 GB


Evaluating:  98%|█████████▊| 488/500 [02:22<00:03,  3.46it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 488: GPU Memory Used: 3.145 GB


Evaluating:  98%|█████████▊| 489/500 [02:22<00:03,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 489: GPU Memory Used: 3.146 GB


Evaluating:  98%|█████████▊| 490/500 [02:22<00:02,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 490: GPU Memory Used: 3.145 GB


Evaluating:  98%|█████████▊| 491/500 [02:23<00:02,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 491: GPU Memory Used: 3.148 GB


Evaluating:  98%|█████████▊| 492/500 [02:23<00:02,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 492: GPU Memory Used: 3.146 GB


Evaluating:  99%|█████████▊| 493/500 [02:23<00:02,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 493: GPU Memory Used: 3.145 GB


Evaluating:  99%|█████████▉| 494/500 [02:24<00:01,  3.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 494: GPU Memory Used: 3.146 GB


Evaluating:  99%|█████████▉| 495/500 [02:24<00:01,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 495: GPU Memory Used: 3.145 GB


Evaluating:  99%|█████████▉| 496/500 [02:24<00:01,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 496: GPU Memory Used: 3.145 GB


Evaluating:  99%|█████████▉| 497/500 [02:24<00:00,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 497: GPU Memory Used: 3.146 GB


Evaluating: 100%|█████████▉| 498/500 [02:25<00:00,  3.50it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 498: GPU Memory Used: 3.143 GB


Evaluating: 100%|█████████▉| 499/500 [02:25<00:00,  3.42it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Sample 499: GPU Memory Used: 3.150 GB


Evaluating: 100%|██████████| 500/500 [02:25<00:00,  3.43it/s]

Sample 500: GPU Memory Used: 3.148 GB
Avg latency: 0.079 sec
Tokens per sec: 13.05
Avg perplexity: nan
BLEU Score: 0.000
ROUGE-1 Score: 0.000
ROUGE-L Score: 0.000
Memory usage (GB): 3.146
FLOP Reduction (%): 0.00
Retrieval Latency (sec): 0.000
F1 Score: 0.000
Knowledge Retention: 0.000
Memory Reduction (%): 0.00
Query Processing Time (sec): 0.079
Accuracy Drop: 0.000
Compression Ratio: 1.00



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
