In [8]:
import argparse
from typing import Optional, Union

import pandas as pd
import numpy as np
import torch
import torch.nn as nn

from dataclasses import dataclass

import datasets
from datasets import Dataset
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import log_loss

from transformers import (
    AutoTokenizer,
    AutoConfig,
    EarlyStoppingCallback,
    AutoModelForCausalLM,
    AutoModelForMultipleChoice,
    TrainingArguments,
    Trainer,
    RobertaForMultipleChoice,
    AutoModelForSequenceClassification,
    LlamaModel,
    LlamaForSequenceClassification,
    BitsAndBytesConfig,
    get_polynomial_decay_schedule_with_warmup,
    get_cosine_schedule_with_warmup,
    TrainerCallback,
)
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy

from peft import (
    get_peft_config,
    PeftModel,
    PeftConfig,
    get_peft_model,
    LoraConfig,
    TaskType,
)
import os

In [9]:
from torch.utils.data import Dataset
class InstructionDataSet(Dataset):
    def __init__(self, data, tokenizer, max_source_length, max_target_length):
        super(InstructionDataSet, self).__init__()
        #self.data = data.sample(len(data), random_state=0).reset_index(drop=True)
        self.data = data
        self.tokenizer = tokenizer
        self.max_source_length = max_source_length
        self.max_target_length = max_target_length
        # self.A_token = self.tokenizer.encode(text='A', add_special_tokens=False, truncation=True, )
        # self.B_token = self.tokenizer.encode(text='B', add_special_tokens=False, truncation=True, )
        # self.C_token = self.tokenizer.encode(text='C', add_special_tokens=False, truncation=True, )

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        now_data = self.data.loc[index]
        idx = now_data['id']
        r_a = now_data['instruction_a']
        r_b = now_data['instruction_b']
        
        templete_part1 = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nHere are two question-answering dialogues. Compare two model performance on answering question, determine which is better.\n\n"
        templete_part1_input_ids = self.tokenizer.encode(text=templete_part1, add_special_tokens=True,)
        
        model_a_input_ids = self.tokenizer.encode(text=r_a, add_special_tokens=True, truncation=True,
                                          max_length=self.max_source_length // 2)
        model_b_input_ids = self.tokenizer.encode(text=r_b, add_special_tokens=True, truncation=True,
                                          max_length=self.max_source_length // 2)
        templete_part2 = "###options\nA. Model A\nB. Model B\nC. Tie\n<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>"
        templete_part2_input_ids = self.tokenizer.encode(text=templete_part2, add_special_tokens=True,)
        
        input_ids = templete_part1_input_ids + model_a_input_ids + model_b_input_ids + templete_part2_input_ids

        return {
            "input_ids": input_ids,
            "id": idx
        }

In [10]:
from typing import Any, Callable, Dict, List, NewType, Optional, Tuple, Union
@dataclass
class DataCollatorForInstruction:
    tokenizer: PreTrainedTokenizerBase
    model: Optional[Any] = None
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None
    label_pad_token_id: int = -100
    return_tensors: str = "pt"

    def __call__(self, features, return_tensors=None):
        if return_tensors is None:
            return_tensors = self.return_tensors
        labels = [feature["labels"] for feature in features] if "labels" in features[0].keys() else None
        # We have to pad the labels before calling `tokenizer.pad` as this method won't pad them and needs them of the
        # same length to return tensors.

        features = self.tokenizer.pad(
            features,
            padding=True,
            max_length=MAX_LENGTH,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors=return_tensors,
        )

        # prepare decoder_input_ids
        if (
                labels is not None
                and self.model is not None
                and hasattr(self.model, "prepare_decoder_input_ids_from_labels")
        ):
            decoder_input_ids = self.model.prepare_decoder_input_ids_from_labels(labels=features["labels"])
            features["decoder_input_ids"] = decoder_input_ids
        # breakpoint() # [(len(features[i]['input_ids']),len(features[i]['labels'])) for i in range(4)]
        return features

In [11]:
test = pd.read_csv('dataset/random_instruction_valid.csv')#.sample(5).reset_index(drop = True)
test = test.loc[:100,:].reset_index(drop=True)
#sample_sub = pd.read_csv('/kaggle/input/lmsys-chatbot-arena/sample_submission.csv')

In [30]:
from tqdm import tqdm
def inference(model, test_dataloader):
    test_predictions = []
    for batch in tqdm(test_dataloader):
        for k in batch.keys():
            batch[k] = batch[k].to(device)
        with torch.no_grad():
            batch_input, idx = batch['input_ids'], batch['id']
            batch_input = {"input_ids": batch_input}
            outputs = model(**batch_input)
            #logits = outputs.logits.cpu().detach().numpy()
            response = model.generate(**batch_input, max_new_tokens=2, return_dict_in_generate=True, output_scores=True)
            #batch_input['input_ids'].shape[-1] + 1
            print(f"score {response.scores}")
            print(f"score is {response.scores[0]}")
            #print(f"response is {response}")
            #redict = np.exp(logits) / np.sum(np.exp(logits), axis=-1, keepdims=True)
            score = response.scores[0]
            A_prob, B_prob, C_prob = score[:,A_TOKEN_IDS], score[:,B_TOKEN_IDS], score[:,C_TOKEN_IDS]
            logits = torch.Tensor([[A_prob,B_prob,C_prob]])
            logits = torch.softmax(logits, dim=-1).cpu().numpy()
            print(f"logits is {logits}")
            node_result = [[idx[i],logits[i]] for i in range(batch_size)]

        test_predictions.append(node_result)
    return test_predictions

In [14]:
device = torch.device("cuda:0")

In [15]:
base_model = 'meta-llama/llama-3-transformers-8b-hf-v1'
model_path = "output/efficient-sponge-113/checkpoint-4"
MAX_LENGTH = 1500

In [16]:
tokenizer = AutoTokenizer.from_pretrained(model_path)
config = AutoConfig.from_pretrained(base_model, trust_remote_code=True)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)
base_model_0 = AutoModelForCausalLM.from_pretrained(base_model,
                                                 config=config,
                                                 quantization_config=bnb_config,
                                                 torch_dtype=torch.bfloat16,
                                                 device_map="auto",
                                                 trust_remote_code=True)
base_model_0.config.pad_token_id = tokenizer.pad_token_id
base_model_0.resize_token_embeddings(len(tokenizer))
new_model = model_path
model0 = PeftModel.from_pretrained(base_model_0, new_model).to(device)
#model0 = model0.merge_and_unload()
model0.eval()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128257, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_la

In [17]:

A_TOKEN_IDS = tokenizer.encode('A',add_special_tokens=True, truncation=True,
                                          max_length=1024)
B_TOKEN_IDS = tokenizer.encode('B',add_special_tokens=True, truncation=True,
                                          max_length=1024)
C_TOKEN_IDS = tokenizer.encode('C',add_special_tokens=True, truncation=True,
                                          max_length=1024)

In [19]:
tokenized_dataset = InstructionDataSet(test, tokenizer,MAX_LENGTH, 1)
# test_dataloader = torch.utils.data.DataLoader(tokenized_dataset, batch_size = 2 ,shuffle=False, collate_fn=collate_fn)


data_collator = DataCollatorForInstruction(tokenizer=tokenizer)
batch_size = 1
test_dataloader = DataLoader(tokenized_dataset, batch_size=batch_size, shuffle=False, collate_fn=data_collator)

In [31]:
sub_pred = inference(model = model0, test_dataloader = test_dataloader)

  0%|          | 0/101 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 1/101 [00:00<00:48,  2.05it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.9062,  4.9688,  5.8750,  ..., -3.8750, -3.8750,  2.7500]],
       device='cuda:0'), tensor([[4.7500, 4.7188, 6.8125,  ..., 0.0806, 0.0806, 2.4062]],
       device='cuda:0'))
score is tensor([[ 3.9062,  4.9688,  5.8750,  ..., -3.8750, -3.8750,  2.7500]],
       device='cuda:0')
logits is [[0.3916697  0.50291383 0.10541646]]


  2%|▏         | 2/101 [00:00<00:43,  2.28it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 5.1562,  5.8438,  6.7188,  ..., -4.5938, -4.5938,  1.4844]],
       device='cuda:0'), tensor([[4.5938, 5.9688, 7.1562,  ..., 3.0312, 3.0312, 0.9922]],
       device='cuda:0'))
score is tensor([[ 5.1562,  5.8438,  6.7188,  ..., -4.5938, -4.5938,  1.4844]],
       device='cuda:0')
logits is [[0.5511481  0.252334   0.19651791]]


  3%|▎         | 3/101 [00:01<00:38,  2.51it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.6250,  5.2812,  5.5938,  ..., -1.7188, -1.7188,  3.7188]],
       device='cuda:0'), tensor([[3.8281, 4.7812, 6.4062,  ..., 0.1157, 0.1157, 2.3906]],
       device='cuda:0'))
score is tensor([[ 4.6250,  5.2812,  5.5938,  ..., -1.7188, -1.7188,  3.7188]],
       device='cuda:0')
logits is [[0.5685464  0.26856232 0.16289128]]


  4%|▍         | 4/101 [00:01<00:32,  2.99it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.1875,  5.3750,  6.3438,  ..., -4.9375, -4.9375,  3.2031]],
       device='cuda:0'), tensor([[ 3.8750,  5.5625,  7.4375,  ..., -1.1484, -1.1484,  2.8750]],
       device='cuda:0'))
score is tensor([[ 4.1875,  5.3750,  6.3438,  ..., -4.9375, -4.9375,  3.2031]],
       device='cuda:0')
logits is [[0.5139557  0.38795412 0.09809016]]


  5%|▍         | 5/101 [00:01<00:35,  2.69it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.2812,  4.3125,  6.4375,  ..., -5.9375, -5.9375,  1.4922]],
       device='cuda:0'), tensor([[ 4.0312,  4.8125,  7.1250,  ..., -4.0312, -4.0312,  2.5312]],
       device='cuda:0'))
score is tensor([[ 4.2812,  4.3125,  6.4375,  ..., -5.9375, -5.9375,  1.4922]],
       device='cuda:0')
logits is [[0.32584575 0.5542823  0.11987196]]


  6%|▌         | 6/101 [00:02<00:30,  3.12it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.1562,  5.2812,  6.0000,  ..., -5.8125, -5.8125,  1.8281]],
       device='cuda:0'), tensor([[3.3750, 4.4062, 6.6250,  ..., 2.3281, 2.3281, 2.9062]],
       device='cuda:0'))
score is tensor([[ 4.1562,  5.2812,  6.0000,  ..., -5.8125, -5.8125,  1.8281]],
       device='cuda:0')
logits is [[0.17135197 0.72141874 0.10722932]]


  7%|▋         | 7/101 [00:02<00:32,  2.87it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.8438,  5.7188,  7.1250,  ..., -1.0547, -1.0547,  2.2812]],
       device='cuda:0'), tensor([[4.4688, 5.4375, 7.7500,  ..., 1.0781, 1.0781, 3.2344]],
       device='cuda:0'))
score is tensor([[ 4.8438,  5.7188,  7.1250,  ..., -1.0547, -1.0547,  2.2812]],
       device='cuda:0')
logits is [[0.52931654 0.22065195 0.2500314 ]]


  8%|▊         | 8/101 [00:02<00:33,  2.74it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.6562,  4.8750,  5.9375,  ..., -3.7500, -3.7500,  3.4531]],
       device='cuda:0'), tensor([[ 4.5625,  4.8750,  7.4375,  ..., -2.3125, -2.3125,  3.5938]],
       device='cuda:0'))
score is tensor([[ 3.6562,  4.8750,  5.9375,  ..., -3.7500, -3.7500,  3.4531]],
       device='cuda:0')
logits is [[0.44207612 0.3901308  0.16779317]]


  9%|▉         | 9/101 [00:03<00:32,  2.86it/s]

score (tensor([[ 4.2812,  4.6875,  5.7188,  ..., -5.5312, -5.5312,  1.5859]],
       device='cuda:0'), tensor([[2.5000, 3.9688, 5.8750,  ..., 2.0781, 2.0781, 1.7109]],
       device='cuda:0'))
score is tensor([[ 4.2812,  4.6875,  5.7188,  ..., -5.5312, -5.5312,  1.5859]],
       device='cuda:0')
logits is [[0.3617317  0.5963948  0.04187349]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 10%|▉         | 10/101 [00:03<00:37,  2.40it/s]

score (tensor([[4.1875, 5.8125, 6.9688,  ..., 2.7031, 2.7031, 2.7656]],
       device='cuda:0'), tensor([[4.0938, 4.8438, 6.4375,  ..., 1.6484, 1.6484, 2.8594]],
       device='cuda:0'))
score is tensor([[4.1875, 5.8125, 6.9688,  ..., 2.7031, 2.7031, 2.7656]],
       device='cuda:0')
logits is [[0.6913343  0.15915419 0.14951153]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 11%|█         | 11/101 [00:04<00:47,  1.88it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.7188,  6.3125,  7.6562,  ..., -5.6562, -5.6562,  0.9883]],
       device='cuda:0'), tensor([[ 4.3125,  6.2500,  8.6875,  ..., -0.1523, -0.1523,  1.3281]],
       device='cuda:0'))
score is tensor([[ 4.7188,  6.3125,  7.6562,  ..., -5.6562, -5.6562,  0.9883]],
       device='cuda:0')
logits is [[0.6044545  0.17317912 0.2223664 ]]


 12%|█▏        | 12/101 [00:04<00:40,  2.22it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.7500,  5.6562,  6.4688,  ..., -4.2500, -4.2500,  3.0469]],
       device='cuda:0'), tensor([[ 3.7031,  6.0000,  7.4375,  ..., -0.4512, -0.4531,  1.8516]],
       device='cuda:0'))
score is tensor([[ 3.7500,  5.6562,  6.4688,  ..., -4.2500, -4.2500,  3.0469]],
       device='cuda:0')
logits is [[0.5487513  0.26744002 0.18380865]]


 13%|█▎        | 13/101 [00:05<00:38,  2.29it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[4.5000, 6.0000, 7.0312,  ..., 1.7109, 1.7109, 3.1250]],
       device='cuda:0'), tensor([[4.0625, 5.5625, 7.5312,  ..., 1.8906, 1.8906, 2.5156]],
       device='cuda:0'))
score is tensor([[4.5000, 6.0000, 7.0312,  ..., 1.7109, 1.7109, 3.1250]],
       device='cuda:0')
logits is [[0.6874402  0.1440953  0.16846447]]


 14%|█▍        | 14/101 [00:05<00:32,  2.65it/s]

score (tensor([[ 5.5938,  6.0625,  7.1562,  ..., -4.5625, -4.5625,  2.9844]],
       device='cuda:0'), tensor([[7.8438, 6.8125, 8.0625,  ..., 0.4121, 0.4121, 3.0625]],
       device='cuda:0'))
score is tensor([[ 5.5938,  6.0625,  7.1562,  ..., -4.5625, -4.5625,  2.9844]],
       device='cuda:0')
logits is [[0.45278412 0.4819862  0.06522974]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 15%|█▍        | 15/101 [00:06<00:38,  2.21it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.6875,  5.4062,  6.3438,  ..., -4.6562, -4.6562,  2.9375]],
       device='cuda:0'), tensor([[ 6.1250,  5.1250,  7.2188,  ..., -0.1680, -0.1680,  3.3750]],
       device='cuda:0'))
score is tensor([[ 4.6875,  5.4062,  6.3438,  ..., -4.6562, -4.6562,  2.9375]],
       device='cuda:0')
logits is [[0.43734327 0.38595408 0.17670265]]


 16%|█▌        | 16/101 [00:06<00:34,  2.45it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 6.0938,  5.9375,  7.7500,  ..., -4.1250, -4.1250,  2.4219]],
       device='cuda:0'), tensor([[5.9062, 6.1875, 8.4375,  ..., 0.1270, 0.1270, 2.9531]],
       device='cuda:0'))
score is tensor([[ 6.0938,  5.9375,  7.7500,  ..., -4.1250, -4.1250,  2.4219]],
       device='cuda:0')
logits is [[0.44925287 0.42203403 0.1287131 ]]


 17%|█▋        | 17/101 [00:06<00:29,  2.81it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.6875,  5.5000,  7.8438,  ..., -0.3359, -0.3359,  4.1562]],
       device='cuda:0'), tensor([[4.4375, 6.0625, 8.0625,  ..., 0.6875, 0.6875, 3.2344]],
       device='cuda:0'))
score is tensor([[ 4.6875,  5.5000,  7.8438,  ..., -0.3359, -0.3359,  4.1562]],
       device='cuda:0')
logits is [[0.52931654 0.22065195 0.2500314 ]]


 18%|█▊        | 18/101 [00:07<00:31,  2.63it/s]

score (tensor([[ 4.4375,  5.8125,  7.3750,  ..., -3.3906, -3.3906,  2.8125]],
       device='cuda:0'), tensor([[ 4.6562,  5.2812,  7.6875,  ..., -0.9570, -0.9570,  2.3750]],
       device='cuda:0'))
score is tensor([[ 4.4375,  5.8125,  7.3750,  ..., -3.3906, -3.3906,  2.8125]],
       device='cuda:0')
logits is [[0.5254472  0.28125164 0.19330123]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 19%|█▉        | 19/101 [00:07<00:34,  2.41it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.1250,  5.8750,  6.8438,  ..., -2.8438, -2.8438,  2.1562]],
       device='cuda:0'), tensor([[ 4.4062,  5.9688,  7.9688,  ..., -0.6367, -0.6367,  2.0625]],
       device='cuda:0'))
score is tensor([[ 4.1250,  5.8750,  6.8438,  ..., -2.8438, -2.8438,  2.1562]],
       device='cuda:0')
logits is [[0.5872551  0.19670583 0.21603909]]


 20%|█▉        | 20/101 [00:07<00:29,  2.72it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.7188,  6.0312,  6.9375,  ..., -5.7188, -5.7188,  1.7812]],
       device='cuda:0'), tensor([[4.4688, 6.5938, 7.9688,  ..., 0.0605, 0.0605, 2.1406]],
       device='cuda:0'))
score is tensor([[ 4.7188,  6.0312,  6.9375,  ..., -5.7188, -5.7188,  1.7812]],
       device='cuda:0')
logits is [[0.36457288 0.34248453 0.29294255]]


 21%|██        | 21/101 [00:08<00:27,  2.86it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.9062,  5.2500,  6.5000,  ..., -2.2031, -2.2031,  4.1250]],
       device='cuda:0'), tensor([[4.3750, 5.0312, 6.5938,  ..., 0.8750, 0.8750, 2.2812]],
       device='cuda:0'))
score is tensor([[ 4.9062,  5.2500,  6.5000,  ..., -2.2031, -2.2031,  4.1250]],
       device='cuda:0')
logits is [[0.57202786 0.20396282 0.22400932]]


 22%|██▏       | 22/101 [00:08<00:27,  2.82it/s]

score (tensor([[ 4.9062,  5.8438,  6.9688,  ..., -4.0625, -4.0625,  1.3594]],
       device='cuda:0'), tensor([[4.1250, 5.4375, 7.5312,  ..., 1.6250, 1.6250, 2.2031]],
       device='cuda:0'))
score is tensor([[ 4.9062,  5.8438,  6.9688,  ..., -4.0625, -4.0625,  1.3594]],
       device='cuda:0')
logits is [[0.5137218  0.22796267 0.25831556]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 23%|██▎       | 23/101 [00:09<00:37,  2.06it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.1562,  5.5938,  6.4062,  ..., -2.8750, -2.8750,  2.4375]],
       device='cuda:0'), tensor([[ 5.4062,  6.6250,  7.8125,  ..., -0.0718, -0.0718,  1.9141]],
       device='cuda:0'))
score is tensor([[ 4.1562,  5.5938,  6.4062,  ..., -2.8750, -2.8750,  2.4375]],
       device='cuda:0')
logits is [[0.35040897 0.44993404 0.19965702]]


 24%|██▍       | 24/101 [00:09<00:35,  2.18it/s]

score (tensor([[ 4.6250,  5.6562,  6.3750,  ..., -5.3125, -5.3125,  1.7188]],
       device='cuda:0'), tensor([[4.0938, 5.5000, 7.1250,  ..., 2.2812, 2.2812, 2.7812]],
       device='cuda:0'))
score is tensor([[ 4.6250,  5.6562,  6.3750,  ..., -5.3125, -5.3125,  1.7188]],
       device='cuda:0')
logits is [[0.33613393 0.30605343 0.3578127 ]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 25%|██▍       | 25/101 [00:10<00:38,  1.96it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.4062,  6.9062,  6.9375,  ..., -4.5938, -4.5938,  1.7109]],
       device='cuda:0'), tensor([[4.5938, 6.4062, 8.3750,  ..., 1.2969, 1.2969, 1.9766]],
       device='cuda:0'))
score is tensor([[ 4.4062,  6.9062,  6.9375,  ..., -4.5938, -4.5938,  1.7109]],
       device='cuda:0')
logits is [[0.47540227 0.3069428  0.21765502]]


 26%|██▌       | 26/101 [00:10<00:34,  2.17it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.3438,  5.8125,  6.9375,  ..., -2.8906, -2.8906,  2.5312]],
       device='cuda:0'), tensor([[4.3125, 6.0312, 7.8125,  ..., 1.1172, 1.1172, 2.2969]],
       device='cuda:0'))
score is tensor([[ 4.3438,  5.8125,  6.9375,  ..., -2.8906, -2.8906,  2.5312]],
       device='cuda:0')
logits is [[0.6697112  0.22432515 0.10596369]]


 27%|██▋       | 27/101 [00:11<00:32,  2.27it/s]

score (tensor([[4.1875, 7.0312, 6.8438,  ..., 0.1826, 0.1826, 4.2812]],
       device='cuda:0'), tensor([[4.2188, 6.8125, 6.8125,  ..., 1.6562, 1.6562, 3.2344]],
       device='cuda:0'))
score is tensor([[4.1875, 7.0312, 6.8438,  ..., 0.1826, 0.1826, 4.2812]],
       device='cuda:0')
logits is [[0.69703925 0.18183357 0.12112726]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 28%|██▊       | 28/101 [00:11<00:40,  1.82it/s]

score (tensor([[ 6.3125,  6.3438,  9.4375,  ..., -3.1719, -3.1719,  2.0000]],
       device='cuda:0'), tensor([[6.4062e+00, 6.5938e+00, 1.0438e+01,  ..., 7.5378e-03, 7.5378e-03,
         3.3984e-01]], device='cuda:0'))
score is tensor([[ 6.3125,  6.3438,  9.4375,  ..., -3.1719, -3.1719,  2.0000]],
       device='cuda:0')
logits is [[0.72671056 0.1837412  0.08954831]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 29%|██▊       | 29/101 [00:12<00:44,  1.60it/s]

score (tensor([[ 4.1875,  5.3125,  6.9375,  ..., -4.5312, -4.5312,  0.8555]],
       device='cuda:0'), tensor([[ 5.7188,  5.9375,  9.5625,  ..., -0.0977, -0.0977,  1.7578]],
       device='cuda:0'))
score is tensor([[ 4.1875,  5.3125,  6.9375,  ..., -4.5312, -4.5312,  0.8555]],
       device='cuda:0')
logits is [[0.4567359  0.33415514 0.20910893]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 30%|██▉       | 30/101 [00:13<00:47,  1.48it/s]

score (tensor([[ 4.2500,  5.3125,  6.6875,  ..., -4.3125, -4.3125,  1.4219]],
       device='cuda:0'), tensor([[5.1250, 5.9375, 9.1875,  ..., 0.1279, 0.1279, 1.6406]],
       device='cuda:0'))
score is tensor([[ 4.2500,  5.3125,  6.6875,  ..., -4.3125, -4.3125,  1.4219]],
       device='cuda:0')
logits is [[0.43666196 0.32960975 0.23372832]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 31%|███       | 31/101 [00:14<00:44,  1.56it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.3438,  5.1250,  5.7500,  ..., -3.1562, -3.1562,  1.5781]],
       device='cuda:0'), tensor([[ 4.1562,  3.3750,  3.8281,  ..., -3.9844, -3.9844, -0.4473]],
       device='cuda:0'))
score is tensor([[ 4.3438,  5.1250,  5.7500,  ..., -3.1562, -3.1562,  1.5781]],
       device='cuda:0')
logits is [[0.5926962  0.23210315 0.17520066]]


 32%|███▏      | 32/101 [00:14<00:37,  1.84it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.9688,  5.0312,  5.6875,  ..., -4.6250, -4.6250,  1.9453]],
       device='cuda:0'), tensor([[ 4.4688,  5.3125,  6.8750,  ..., -3.0156, -3.0156,  1.6875]],
       device='cuda:0'))
score is tensor([[ 3.9688,  5.0312,  5.6875,  ..., -4.6250, -4.6250,  1.9453]],
       device='cuda:0')
logits is [[0.28229594 0.5111719  0.2065321 ]]


 33%|███▎      | 33/101 [00:14<00:34,  1.95it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.2812,  5.4375,  6.6562,  ..., -0.5664, -0.5664,  2.6875]],
       device='cuda:0'), tensor([[4.2500, 4.9375, 6.6562,  ..., 0.6094, 0.6055, 3.2812]],
       device='cuda:0'))
score is tensor([[ 4.2812,  5.4375,  6.6562,  ..., -0.5664, -0.5664,  2.6875]],
       device='cuda:0')
logits is [[0.6170101  0.22698534 0.1560046 ]]


 34%|███▎      | 34/101 [00:15<00:30,  2.22it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[3.9844, 5.6875, 6.2812,  ..., 1.8203, 1.8203, 3.4062]],
       device='cuda:0'), tensor([[ 5.1250,  6.4062,  8.0000,  ...,  2.7188,  2.7188, -0.2334]],
       device='cuda:0'))
score is tensor([[3.9844, 5.6875, 6.2812,  ..., 1.8203, 1.8203, 3.4062]],
       device='cuda:0')
logits is [[0.80913955 0.08798984 0.10287055]]


 35%|███▍      | 35/101 [00:15<00:28,  2.36it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.8438,  4.9688,  6.5000,  ..., -5.9375, -5.9375,  2.7031]],
       device='cuda:0'), tensor([[ 4.0625,  4.5000,  7.0938,  ..., -4.1250, -4.1250,  2.3594]],
       device='cuda:0'))
score is tensor([[ 4.8438,  4.9688,  6.5000,  ..., -5.9375, -5.9375,  2.7031]],
       device='cuda:0')
logits is [[0.2325153  0.6125962  0.15488859]]


 36%|███▌      | 36/101 [00:15<00:27,  2.39it/s]

score (tensor([[ 3.9062,  4.8438,  5.2500,  ..., -5.5000, -5.5000,  1.2969]],
       device='cuda:0'), tensor([[3.5312, 4.2500, 6.6562,  ..., 2.3594, 2.3594, 2.9688]],
       device='cuda:0'))
score is tensor([[ 3.9062,  4.8438,  5.2500,  ..., -5.5000, -5.5000,  1.2969]],
       device='cuda:0')
logits is [[0.3867294  0.46648473 0.14678593]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 37%|███▋      | 37/101 [00:16<00:30,  2.13it/s]

score (tensor([[ 4.2188,  5.7500,  6.5000,  ..., -3.6406, -3.6406,  2.6406]],
       device='cuda:0'), tensor([[ 4.5312,  6.0938,  7.8438,  ..., -2.5000, -2.5000,  2.0625]],
       device='cuda:0'))
score is tensor([[ 4.2188,  5.7500,  6.5000,  ..., -3.6406, -3.6406,  2.6406]],
       device='cuda:0')
logits is [[0.28413245 0.41341025 0.30245742]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 38%|███▊      | 38/101 [00:17<00:32,  1.93it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.3750,  4.9375,  6.0938,  ..., -4.2812, -4.2812,  3.7344]],
       device='cuda:0'), tensor([[ 4.3438,  5.4375,  7.5000,  ..., -3.3750, -3.3750,  2.3594]],
       device='cuda:0'))
score is tensor([[ 4.3750,  4.9375,  6.0938,  ..., -4.2812, -4.2812,  3.7344]],
       device='cuda:0')
logits is [[0.47407696 0.28754222 0.23838086]]


 39%|███▊      | 39/101 [00:17<00:28,  2.20it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.8438,  5.6875,  6.4375,  ..., -5.3750, -5.3750,  1.7578]],
       device='cuda:0'), tensor([[3.7812, 5.5938, 7.2188,  ..., 1.9297, 1.9297, 2.1094]],
       device='cuda:0'))
score is tensor([[ 4.8438,  5.6875,  6.4375,  ..., -5.3750, -5.3750,  1.7578]],
       device='cuda:0')
logits is [[0.28178298 0.28178298 0.43643403]]


 40%|███▉      | 40/101 [00:17<00:25,  2.43it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 5.0625,  5.0312,  7.6250,  ..., -2.2344, -2.2344,  4.5312]],
       device='cuda:0'), tensor([[ 4.3125,  5.2812,  7.6250,  ..., -0.1216, -0.1216,  4.3750]],
       device='cuda:0'))
score is tensor([[ 5.0625,  5.0312,  7.6250,  ..., -2.2344, -2.2344,  4.5312]],
       device='cuda:0')
logits is [[0.6273204  0.22367798 0.14900166]]


 41%|████      | 41/101 [00:17<00:21,  2.80it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.9688,  4.8750,  6.3750,  ..., -1.4844, -1.4844,  3.8906]],
       device='cuda:0'), tensor([[ 3.9219,  5.2188,  7.3125,  ..., -2.1250, -2.1250,  2.6406]],
       device='cuda:0'))
score is tensor([[ 3.9688,  4.8750,  6.3750,  ..., -1.4844, -1.4844,  3.8906]],
       device='cuda:0')
logits is [[0.49407455 0.37294704 0.13297836]]


 42%|████▏     | 42/101 [00:18<00:22,  2.59it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.7188,  6.3438,  6.7188,  ..., -5.5938, -5.5938,  1.3359]],
       device='cuda:0'), tensor([[4.1875, 5.3125, 7.3438,  ..., 2.4219, 2.4219, 0.0737]],
       device='cuda:0'))
score is tensor([[ 4.7188,  6.3438,  6.7188,  ..., -5.5938, -5.5938,  1.3359]],
       device='cuda:0')
logits is [[0.23823412 0.36898422 0.39278165]]


 43%|████▎     | 43/101 [00:18<00:22,  2.56it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.1562,  6.2188,  6.0938,  ..., -5.0312, -5.0312,  2.7188]],
       device='cuda:0'), tensor([[ 4.8125,  6.8125,  8.1250,  ..., -0.6250, -0.6250,  2.0156]],
       device='cuda:0'))
score is tensor([[ 4.1562,  6.2188,  6.0938,  ..., -5.0312, -5.0312,  2.7188]],
       device='cuda:0')
logits is [[0.2909895  0.5269139  0.18209657]]


 44%|████▎     | 44/101 [00:19<00:21,  2.61it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.4688,  5.6562,  6.4062,  ..., -4.9375, -4.9375,  2.7344]],
       device='cuda:0'), tensor([[4.1562, 5.6875, 7.5938,  ..., 0.9570, 0.9570, 3.4219]],
       device='cuda:0'))
score is tensor([[ 4.4688,  5.6562,  6.4062,  ..., -4.9375, -4.9375,  2.7344]],
       device='cuda:0')
logits is [[0.4513794  0.29143244 0.25718823]]


 45%|████▍     | 45/101 [00:19<00:20,  2.78it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.9062,  5.1250,  7.5000,  ..., -4.2500, -4.2500,  3.9688]],
       device='cuda:0'), tensor([[4.8125, 5.4062, 7.2500,  ..., 0.5117, 0.5117, 2.9062]],
       device='cuda:0'))
score is tensor([[ 4.9062,  5.1250,  7.5000,  ..., -4.2500, -4.2500,  3.9688]],
       device='cuda:0')
logits is [[0.53732723 0.36929926 0.09337347]]


 46%|████▌     | 46/101 [00:19<00:19,  2.77it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 5.0625,  5.7500,  7.4688,  ..., -2.8125, -2.8125,  2.4844]],
       device='cuda:0'), tensor([[4.6562, 6.3125, 7.8438,  ..., 1.5547, 1.5547, 2.7031]],
       device='cuda:0'))
score is tensor([[ 5.0625,  5.7500,  7.4688,  ..., -2.8125, -2.8125,  2.4844]],
       device='cuda:0')
logits is [[0.53997904 0.25506803 0.20495293]]


 47%|████▋     | 47/101 [00:20<00:20,  2.61it/s]

score (tensor([[ 3.6094,  5.2188,  6.4375,  ..., -3.8438, -3.8438,  1.8828]],
       device='cuda:0'), tensor([[4.1875, 5.5938, 7.8125,  ..., 1.3516, 1.3516, 2.7188]],
       device='cuda:0'))
score is tensor([[ 3.6094,  5.2188,  6.4375,  ..., -3.8438, -3.8438,  1.8828]],
       device='cuda:0')
logits is [[0.52224916 0.3371894  0.14056146]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 48%|████▊     | 48/101 [00:20<00:22,  2.40it/s]

score (tensor([[ 4.5625,  4.7188,  5.5938,  ..., -3.9844, -3.9844,  1.7578]],
       device='cuda:0'), tensor([[6.2188, 4.5312, 6.0312,  ..., 2.8594, 2.8594, 4.2188]],
       device='cuda:0'))
score is tensor([[ 4.5625,  4.7188,  5.5938,  ..., -3.9844, -3.9844,  1.7578]],
       device='cuda:0')
logits is [[0.1771256  0.58077246 0.24210198]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 49%|████▊     | 49/101 [00:21<00:27,  1.89it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[4.5938, 6.6875, 7.7188,  ..., 1.3828, 1.3828, 1.3125]],
       device='cuda:0'), tensor([[ 4.9375,  6.9062,  7.4375,  ...,  4.2812,  4.2812, -0.9141]],
       device='cuda:0'))
score is tensor([[4.5938, 6.6875, 7.7188,  ..., 1.3828, 1.3828, 1.3125]],
       device='cuda:0')
logits is [[0.58395475 0.24342856 0.17261669]]


 50%|████▉     | 50/101 [00:22<00:25,  1.99it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.3750,  4.7188,  6.6250,  ..., -5.3438, -5.3438,  2.3750]],
       device='cuda:0'), tensor([[ 4.6562,  5.1562,  7.5938,  ..., -0.6484, -0.6484,  3.3125]],
       device='cuda:0'))
score is tensor([[ 4.3750,  4.7188,  6.6250,  ..., -5.3438, -5.3438,  2.3750]],
       device='cuda:0')
logits is [[0.34757978 0.4604684  0.19195178]]


 50%|█████     | 51/101 [00:22<00:23,  2.17it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.8438,  5.8750,  7.2500,  ..., -4.6875, -4.6875,  2.5000]],
       device='cuda:0'), tensor([[5.4062, 5.3125, 7.5938,  ..., 1.3906, 1.3906, 2.9531]],
       device='cuda:0'))
score is tensor([[ 4.8438,  5.8750,  7.2500,  ..., -4.6875, -4.6875,  2.5000]],
       device='cuda:0')
logits is [[0.37936556 0.4298775  0.19075698]]


 51%|█████▏    | 52/101 [00:22<00:20,  2.41it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.5938,  6.0938,  6.3125,  ..., -4.2188, -4.2188,  1.9609]],
       device='cuda:0'), tensor([[ 4.1250,  6.2188,  7.2812,  ..., -2.0000, -2.0000,  1.8906]],
       device='cuda:0'))
score is tensor([[ 4.5938,  6.0938,  6.3125,  ..., -4.2188, -4.2188,  1.9609]],
       device='cuda:0')
logits is [[0.31304014 0.2021139  0.48484603]]


 52%|█████▏    | 53/101 [00:23<00:20,  2.37it/s]

score (tensor([[ 3.9062,  4.8750,  6.1875,  ..., -3.3125, -3.3125,  3.3438]],
       device='cuda:0'), tensor([[ 4.6875,  4.7188,  7.0625,  ..., -0.2178, -0.2178,  4.6562]],
       device='cuda:0'))
score is tensor([[ 3.9062,  4.8750,  6.1875,  ..., -3.3125, -3.3125,  3.3438]],
       device='cuda:0')
logits is [[0.40086764 0.42672136 0.17241101]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 53%|█████▎    | 54/101 [00:23<00:22,  2.13it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.5469,  5.4688,  5.4375,  ..., -2.7500, -2.7500,  3.1875]],
       device='cuda:0'), tensor([[ 3.9375,  5.7188,  7.0625,  ..., -0.4375, -0.4375,  2.2500]],
       device='cuda:0'))
score is tensor([[ 3.5469,  5.4688,  5.4375,  ..., -2.7500, -2.7500,  3.1875]],
       device='cuda:0')
logits is [[0.28413245 0.41341025 0.30245742]]


 54%|█████▍    | 55/101 [00:23<00:18,  2.53it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.2188,  5.4375,  7.2500,  ..., -1.6016, -1.6016,  3.9688]],
       device='cuda:0'), tensor([[3.7188, 5.7500, 7.9062,  ..., 0.0271, 0.0271, 3.0781]],
       device='cuda:0'))
score is tensor([[ 4.2188,  5.4375,  7.2500,  ..., -1.6016, -1.6016,  3.9688]],
       device='cuda:0')
logits is [[0.5698827  0.24510351 0.18501382]]


 55%|█████▌    | 56/101 [00:24<00:18,  2.44it/s]

score (tensor([[ 3.6406,  4.8750,  6.5000,  ..., -4.0625, -4.0625,  2.5625]],
       device='cuda:0'), tensor([[3.8750, 5.3750, 7.1250,  ..., 1.5000, 1.5000, 2.8906]],
       device='cuda:0'))
score is tensor([[ 3.6406,  4.8750,  6.5000,  ..., -4.0625, -4.0625,  2.5625]],
       device='cuda:0')
logits is [[0.49052298 0.40665781 0.1028192 ]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 56%|█████▋    | 57/101 [00:25<00:23,  1.89it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.4688,  3.0625,  3.9062,  ..., -0.0859, -0.0859,  1.5078]],
       device='cuda:0'), tensor([[ 3.3438,  4.0000,  6.2188,  ...,  1.8047,  1.8047, -0.3848]],
       device='cuda:0'))
score is tensor([[ 4.4688,  3.0625,  3.9062,  ..., -0.0859, -0.0859,  1.5078]],
       device='cuda:0')
logits is [[0.77691764 0.07936713 0.14371532]]


 57%|█████▋    | 58/101 [00:25<00:20,  2.05it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[4.3438, 6.4688, 6.9375,  ..., 0.9297, 0.9297, 3.0781]],
       device='cuda:0'), tensor([[3.7812, 5.1250, 6.4375,  ..., 0.1895, 0.1895, 2.6562]],
       device='cuda:0'))
score is tensor([[4.3438, 6.4688, 6.9375,  ..., 0.9297, 0.9297, 3.0781]],
       device='cuda:0')
logits is [[0.5770246  0.3287787  0.09419668]]


 58%|█████▊    | 59/101 [00:25<00:18,  2.25it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.1875,  5.0625,  5.8750,  ..., -0.0593, -0.0593,  4.1562]],
       device='cuda:0'), tensor([[4.0312, 4.5625, 6.6250,  ..., 0.6328, 0.6328, 3.6406]],
       device='cuda:0'))
score is tensor([[ 4.1875,  5.0625,  5.8750,  ..., -0.0593, -0.0593,  4.1562]],
       device='cuda:0')
logits is [[0.49052298 0.40665781 0.1028192 ]]


 59%|█████▉    | 60/101 [00:26<00:16,  2.49it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[3.8438, 4.4688, 6.5000,  ..., 1.4141, 1.4141, 4.0000]],
       device='cuda:0'), tensor([[3.9844, 4.8125, 7.2188,  ..., 1.9141, 1.9141, 4.0625]],
       device='cuda:0'))
score is tensor([[3.8438, 4.4688, 6.5000,  ..., 1.4141, 1.4141, 4.0000]],
       device='cuda:0')
logits is [[0.8094406  0.09081672 0.09974265]]


 60%|██████    | 61/101 [00:26<00:15,  2.51it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.5000,  6.1875,  6.8125,  ..., -4.9375, -4.9375,  1.9062]],
       device='cuda:0'), tensor([[ 4.9062,  7.4688,  8.5000,  ..., -2.9688, -2.9688,  1.7031]],
       device='cuda:0'))
score is tensor([[ 4.5000,  6.1875,  6.8125,  ..., -4.9375, -4.9375,  1.9062]],
       device='cuda:0')
logits is [[0.10537178 0.7314251  0.163203  ]]


 61%|██████▏   | 62/101 [00:26<00:14,  2.70it/s]

score (tensor([[ 4.3125,  5.0000,  7.9062,  ..., -1.1016, -1.1016,  4.6562]],
       device='cuda:0'), tensor([[4.4375, 5.5938, 7.9688,  ..., 0.4922, 0.4922, 4.3750]],
       device='cuda:0'))
score is tensor([[ 4.3125,  5.0000,  7.9062,  ..., -1.1016, -1.1016,  4.6562]],
       device='cuda:0')
logits is [[0.7275436  0.15734234 0.11511411]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 62%|██████▏   | 63/101 [00:27<00:16,  2.34it/s]

score (tensor([[ 4.2188,  4.7500,  6.0000,  ..., -4.1875, -4.2188,  3.3281]],
       device='cuda:0'), tensor([[ 5.0000,  5.2188,  7.3125,  ..., -1.3750, -1.3750,  3.2812]],
       device='cuda:0'))
score is tensor([[ 4.2188,  4.7500,  6.0000,  ..., -4.1875, -4.2188,  3.3281]],
       device='cuda:0')
logits is [[0.32058144 0.45209226 0.22732626]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 63%|██████▎   | 64/101 [00:27<00:16,  2.23it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.0312,  5.8750,  6.8750,  ..., -3.8594, -3.8594,  3.1719]],
       device='cuda:0'), tensor([[4.0938, 5.8750, 7.7500,  ..., 0.8047, 0.8047, 2.0156]],
       device='cuda:0'))
score is tensor([[ 4.0312,  5.8750,  6.8750,  ..., -3.8594, -3.8594,  3.1719]],
       device='cuda:0')
logits is [[0.63184255 0.17545633 0.1927011 ]]


 64%|██████▍   | 65/101 [00:28<00:15,  2.31it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.9375,  4.4688,  6.7188,  ..., -3.4375, -3.4219,  2.2344]],
       device='cuda:0'), tensor([[3.3438, 4.3750, 6.7188,  ..., 2.0625, 2.0625, 1.9297]],
       device='cuda:0'))
score is tensor([[ 3.9375,  4.4688,  6.7188,  ..., -3.4375, -3.4219,  2.2344]],
       device='cuda:0')
logits is [[0.42231882 0.42231882 0.15536241]]


 65%|██████▌   | 66/101 [00:28<00:14,  2.45it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.7188,  5.3438,  7.3750,  ..., -3.8125, -3.8125,  2.1875]],
       device='cuda:0'), tensor([[4.5625, 5.6875, 7.6875,  ..., 1.1406, 1.1406, 3.0312]],
       device='cuda:0'))
score is tensor([[ 4.7188,  5.3438,  7.3750,  ..., -3.8125, -3.8125,  2.1875]],
       device='cuda:0')
logits is [[0.41994193 0.34814408 0.23191397]]


 66%|██████▋   | 67/101 [00:29<00:13,  2.55it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.7656,  5.3125,  6.3438,  ..., -5.2188, -5.2188,  1.7578]],
       device='cuda:0'), tensor([[3.7812, 4.0312, 7.4062,  ..., 2.3906, 2.3906, 2.2344]],
       device='cuda:0'))
score is tensor([[ 3.7656,  5.3125,  6.3438,  ..., -5.2188, -5.2188,  1.7578]],
       device='cuda:0')
logits is [[0.18443936 0.47098243 0.34457812]]


 67%|██████▋   | 68/101 [00:29<00:13,  2.47it/s]

score (tensor([[4.3438, 5.6250, 7.5625,  ..., 1.0156, 1.0156, 3.1250]],
       device='cuda:0'), tensor([[4.6875, 6.2500, 8.1250,  ..., 0.6055, 0.6055, 1.4688]],
       device='cuda:0'))
score is tensor([[4.3438, 5.6250, 7.5625,  ..., 1.0156, 1.0156, 3.1250]],
       device='cuda:0')
logits is [[0.62452495 0.24456748 0.13090754]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 68%|██████▊   | 69/101 [00:30<00:13,  2.33it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.8594,  5.5000,  6.5938,  ..., -2.7969, -2.7969,  2.8438]],
       device='cuda:0'), tensor([[4.3125, 5.4688, 7.7500,  ..., 0.1680, 0.1680, 2.6719]],
       device='cuda:0'))
score is tensor([[ 3.8594,  5.5000,  6.5938,  ..., -2.7969, -2.7969,  2.8438]],
       device='cuda:0')
logits is [[0.3846515  0.4094594  0.20588912]]


 69%|██████▉   | 70/101 [00:30<00:13,  2.30it/s]

score (tensor([[ 4.1562,  5.8438,  6.3750,  ..., -0.5391, -0.5391,  2.6406]],
       device='cuda:0'), tensor([[4.3438, 5.5312, 7.3750,  ..., 0.0630, 0.0630, 2.5781]],
       device='cuda:0'))
score is tensor([[ 4.1562,  5.8438,  6.3750,  ..., -0.5391, -0.5391,  2.6406]],
       device='cuda:0')
logits is [[0.5472004  0.27514964 0.17764995]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 70%|███████   | 71/101 [00:30<00:13,  2.21it/s]

score (tensor([[ 3.3125,  4.3125,  6.2812,  ..., -2.0469, -2.0469,  0.5820]],
       device='cuda:0'), tensor([[3.7812, 4.7500, 6.8125,  ..., 0.9609, 0.9609, 1.5938]],
       device='cuda:0'))
score is tensor([[ 3.3125,  4.3125,  6.2812,  ..., -2.0469, -2.0469,  0.5820]],
       device='cuda:0')
logits is [[0.506632   0.2886702  0.20469782]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 71%|███████▏  | 72/101 [00:31<00:13,  2.14it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.2031,  3.3125,  7.1875,  ..., -1.0547, -1.0547,  1.6016]],
       device='cuda:0'), tensor([[4.0938, 4.2500, 6.7500,  ..., 0.9609, 0.9609, 1.7031]],
       device='cuda:0'))
score is tensor([[ 3.2031,  3.3125,  7.1875,  ..., -1.0547, -1.0547,  1.6016]],
       device='cuda:0')
logits is [[0.5322074  0.25937718 0.20841542]]


 72%|███████▏  | 73/101 [00:31<00:12,  2.25it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.5625,  7.7500,  7.5312,  ..., -1.2422, -1.2422,  1.2188]],
       device='cuda:0'), tensor([[ 4.9688,  7.7188,  9.3750,  ..., -0.5859, -0.5859,  0.3457]],
       device='cuda:0'))
score is tensor([[ 4.5625,  7.7500,  7.5312,  ..., -1.2422, -1.2422,  1.2188]],
       device='cuda:0')
logits is [[0.65440184 0.14601679 0.1995813 ]]


 73%|███████▎  | 74/101 [00:32<00:11,  2.39it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.6875,  5.4375,  6.7812,  ..., -4.8750, -4.8750,  2.2500]],
       device='cuda:0'), tensor([[4.2188, 5.3750, 7.2188,  ..., 1.7266, 1.7266, 3.7188]],
       device='cuda:0'))
score is tensor([[ 4.6875,  5.4375,  6.7812,  ..., -4.8750, -4.8750,  2.2500]],
       device='cuda:0')
logits is [[0.43340844 0.40714955 0.15944205]]


 74%|███████▍  | 75/101 [00:32<00:10,  2.41it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.8125,  6.1250,  5.8750,  ..., -4.0938, -4.0938,  3.0000]],
       device='cuda:0'), tensor([[4.4062, 6.7812, 7.0938,  ..., 0.9297, 0.9297, 3.0156]],
       device='cuda:0'))
score is tensor([[ 3.8125,  6.1250,  5.8750,  ..., -4.0938, -4.0938,  3.0000]],
       device='cuda:0')
logits is [[0.31692097 0.4908568  0.19222228]]


 75%|███████▌  | 76/101 [00:32<00:09,  2.54it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 5.0000,  5.3438,  7.5938,  ..., -4.7188, -4.7188,  2.6562]],
       device='cuda:0'), tensor([[4.8125, 5.7812, 8.1875,  ..., 1.6953, 1.6953, 3.6562]],
       device='cuda:0'))
score is tensor([[ 5.0000,  5.3438,  7.5938,  ..., -4.7188, -4.7188,  2.6562]],
       device='cuda:0')
logits is [[0.37708744 0.48418987 0.13872272]]


 76%|███████▌  | 77/101 [00:33<00:08,  2.92it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.7812,  5.4375,  7.6250,  ..., -0.0723, -0.0723,  2.6406]],
       device='cuda:0'), tensor([[ 4.3125,  6.0000,  8.0000,  ..., -0.1436, -0.1436,  1.7812]],
       device='cuda:0'))
score is tensor([[ 4.7812,  5.4375,  7.6250,  ..., -0.0723, -0.0723,  2.6406]],
       device='cuda:0')
logits is [[0.625746   0.2301991  0.14405492]]


 77%|███████▋  | 78/101 [00:33<00:07,  3.02it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.1875,  5.6875,  6.2500,  ..., -1.4922, -1.4922,  1.8984]],
       device='cuda:0'), tensor([[4.3125, 6.1562, 7.5625,  ..., 0.7891, 0.7891, 2.5000]],
       device='cuda:0'))
score is tensor([[ 4.1875,  5.6875,  6.2500,  ..., -1.4922, -1.4922,  1.8984]],
       device='cuda:0')
logits is [[0.4136063  0.36500627 0.2213875 ]]


 78%|███████▊  | 79/101 [00:33<00:07,  2.94it/s]

score (tensor([[ 3.7031,  4.9688,  5.8438,  ..., -4.2812, -4.2812,  1.8125]],
       device='cuda:0'), tensor([[4.0938, 5.1875, 7.4062,  ..., 1.7188, 1.7188, 1.6641]],
       device='cuda:0'))
score is tensor([[ 3.7031,  4.9688,  5.8438,  ..., -4.2812, -4.2812,  1.8125]],
       device='cuda:0')
logits is [[0.44207612 0.3901308  0.16779317]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 79%|███████▉  | 80/101 [00:34<00:08,  2.59it/s]

score (tensor([[ 3.5625,  4.8125,  5.6875,  ..., -3.6719, -3.6719,  1.8281]],
       device='cuda:0'), tensor([[4.0312, 5.3125, 6.9375,  ..., 1.4141, 1.4141, 1.3906]],
       device='cuda:0'))
score is tensor([[ 3.5625,  4.8125,  5.6875,  ..., -3.6719, -3.6719,  1.8281]],
       device='cuda:0')
logits is [[0.4781656  0.3286381  0.19319637]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 80%|████████  | 81/101 [00:34<00:09,  2.19it/s]

score (tensor([[ 4.7188,  5.6250,  5.2812,  ..., -6.0000, -6.0000,  0.2520]],
       device='cuda:0'), tensor([[ 3.3438,  6.0625,  5.4375,  ...,  1.5000,  1.5000, -1.1250]],
       device='cuda:0'))
score is tensor([[ 4.7188,  5.6250,  5.2812,  ..., -6.0000, -6.0000,  0.2520]],
       device='cuda:0')
logits is [[0.18991716 0.6424815  0.16760132]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 81%|████████  | 82/101 [00:35<00:09,  2.04it/s]

score (tensor([[ 4.9062,  4.8750,  6.2500,  ..., -3.5938, -3.5938,  2.2031]],
       device='cuda:0'), tensor([[5.0000, 5.5000, 7.6250,  ..., 2.6406, 2.6406, 0.8828]],
       device='cuda:0'))
score is tensor([[ 4.9062,  4.8750,  6.2500,  ..., -3.5938, -3.5938,  2.2031]],
       device='cuda:0')
logits is [[0.23589431 0.6825828  0.08152289]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 82%|████████▏ | 83/101 [00:36<00:08,  2.04it/s]

score (tensor([[ 4.1562,  4.8750,  5.6875,  ..., -1.5625, -1.5625,  2.5625]],
       device='cuda:0'), tensor([[4.0938, 5.3750, 7.1562,  ..., 0.9258, 0.9258, 2.7188]],
       device='cuda:0'))
score is tensor([[ 4.1562,  4.8750,  5.6875,  ..., -1.5625, -1.5625,  2.5625]],
       device='cuda:0')
logits is [[0.5867823  0.1905003  0.22271743]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 83%|████████▎ | 84/101 [00:36<00:08,  2.04it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.9062,  6.0938,  6.4062,  ..., -3.1719, -3.1719,  3.0312]],
       device='cuda:0'), tensor([[ 4.3125,  6.9688,  8.0625,  ..., -0.2178, -0.2178,  2.5625]],
       device='cuda:0'))
score is tensor([[ 3.9062,  6.0938,  6.4062,  ..., -3.1719, -3.1719,  3.0312]],
       device='cuda:0')
logits is [[0.4841051  0.34328192 0.172613  ]]


 84%|████████▍ | 85/101 [00:36<00:07,  2.09it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.8438,  4.8125,  6.0312,  ..., -5.6875, -5.6875,  1.2422]],
       device='cuda:0'), tensor([[3.1406, 4.6875, 6.7188,  ..., 2.1094, 2.1094, 1.8281]],
       device='cuda:0'))
score is tensor([[ 3.8438,  4.8125,  6.0312,  ..., -5.6875, -5.6875,  1.2422]],
       device='cuda:0')
logits is [[0.23921071 0.55618155 0.20460777]]


 85%|████████▌ | 86/101 [00:37<00:06,  2.42it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.4062,  5.5938,  6.3750,  ..., -3.9844, -3.9844,  1.5000]],
       device='cuda:0'), tensor([[4.5312, 5.7188, 7.4062,  ..., 0.5859, 0.5859, 3.0000]],
       device='cuda:0'))
score is tensor([[ 4.4062,  5.5938,  6.3750,  ..., -3.9844, -3.9844,  1.5000]],
       device='cuda:0')
logits is [[0.35763973 0.3807055  0.26165482]]


 86%|████████▌ | 87/101 [00:37<00:05,  2.71it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 5.0938,  6.9062,  7.5625,  ..., -2.7969, -2.7969,  2.9062]],
       device='cuda:0'), tensor([[ 4.6562,  7.3750,  7.8750,  ..., -2.2031, -2.2031,  2.5469]],
       device='cuda:0'))
score is tensor([[ 5.0938,  6.9062,  7.5625,  ..., -2.7969, -2.7969,  2.9062]],
       device='cuda:0')
logits is [[0.58266294 0.18916295 0.22817408]]


 87%|████████▋ | 88/101 [00:37<00:04,  3.08it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.5156,  5.1562,  6.5938,  ..., -4.6250, -4.6250,  2.3438]],
       device='cuda:0'), tensor([[3.1719, 5.4688, 7.4375,  ..., 3.6094, 3.6094, 1.8906]],
       device='cuda:0'))
score is tensor([[ 3.5156,  5.1562,  6.5938,  ..., -4.6250, -4.6250,  2.3438]],
       device='cuda:0')
logits is [[0.6020486  0.27563792 0.12231359]]


 88%|████████▊ | 89/101 [00:38<00:03,  3.12it/s]

score (tensor([[ 4.3125,  5.5000,  6.1250,  ..., -5.6250, -5.6250,  2.4219]],
       device='cuda:0'), tensor([[ 4.0625,  5.5000,  7.3438,  ..., -0.0688, -0.0688,  2.7344]],
       device='cuda:0'))
score is tensor([[ 4.3125,  5.5000,  6.1250,  ..., -5.6250, -5.6250,  2.4219]],
       device='cuda:0')
logits is [[0.28095952 0.4632239  0.25581655]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 89%|████████▉ | 90/101 [00:38<00:04,  2.65it/s]

score (tensor([[ 4.1250,  5.4062,  6.1562,  ..., -5.8125, -5.8125,  1.9609]],
       device='cuda:0'), tensor([[ 4.1562,  5.5000,  7.7188,  ..., -3.0156, -3.0156,  2.0625]],
       device='cuda:0'))
score is tensor([[ 4.1250,  5.4062,  6.1562,  ..., -5.8125, -5.8125,  1.9609]],
       device='cuda:0')
logits is [[0.32375795 0.39052665 0.2857154 ]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 90%|█████████ | 91/101 [00:39<00:04,  2.03it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[4.5625, 6.5312, 8.3750,  ..., 1.8047, 1.8047, 3.1406]],
       device='cuda:0'), tensor([[4.4062, 6.0938, 8.1875,  ..., 0.9805, 0.9805, 0.9688]],
       device='cuda:0'))
score is tensor([[4.5625, 6.5312, 8.3750,  ..., 1.8047, 1.8047, 3.1406]],
       device='cuda:0')
logits is [[0.73596764 0.127892   0.13614033]]


 91%|█████████ | 92/101 [00:39<00:03,  2.41it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.9219,  4.8125,  5.4375,  ..., -5.2812, -5.2812,  1.5781]],
       device='cuda:0'), tensor([[ 4.5625,  5.7500,  7.7500,  ..., -0.4355, -0.4355,  2.1250]],
       device='cuda:0'))
score is tensor([[ 3.9219,  4.8125,  5.4375,  ..., -5.2812, -5.2812,  1.5781]],
       device='cuda:0')
logits is [[0.44744617 0.46164966 0.09090421]]


 92%|█████████▏| 93/101 [00:39<00:02,  2.75it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.3125,  5.5625,  6.7188,  ..., -3.3594, -3.3594,  3.7344]],
       device='cuda:0'), tensor([[ 4.5938,  5.7188,  7.5625,  ..., -0.9609, -0.9609,  3.3594]],
       device='cuda:0'))
score is tensor([[ 4.3125,  5.5625,  6.7188,  ..., -3.3594, -3.3594,  3.7344]],
       device='cuda:0')
logits is [[0.46435928 0.30932996 0.22631064]]


 93%|█████████▎| 94/101 [00:40<00:02,  2.97it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.4062,  5.8438,  6.6875,  ..., -4.7500, -4.7500,  2.2969]],
       device='cuda:0'), tensor([[4.3438, 6.0312, 7.5625,  ..., 1.3281, 1.3281, 2.4688]],
       device='cuda:0'))
score is tensor([[ 4.4062,  5.8438,  6.6875,  ..., -4.7500, -4.7500,  2.2969]],
       device='cuda:0')
logits is [[0.2786964  0.45949265 0.26181105]]


 94%|█████████▍| 95/101 [00:40<00:02,  2.79it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.0625,  5.8438,  7.1875,  ..., -3.0312, -3.0312,  3.7188]],
       device='cuda:0'), tensor([[4.0938, 5.9375, 8.1250,  ..., 0.4395, 0.4395, 3.1406]],
       device='cuda:0'))
score is tensor([[ 4.0625,  5.8438,  7.1875,  ..., -3.0312, -3.0312,  3.7188]],
       device='cuda:0')
logits is [[0.41687304 0.2375271  0.34559986]]


 95%|█████████▌| 96/101 [00:40<00:01,  3.11it/s]

score (tensor([[ 4.0625,  5.3125,  6.0938,  ..., -4.0625, -4.0625,  2.5469]],
       device='cuda:0'), tensor([[ 4.5312,  6.0938,  8.1250,  ..., -0.6562, -0.6562,  2.1562]],
       device='cuda:0'))
score is tensor([[ 4.0625,  5.3125,  6.0938,  ..., -4.0625, -4.0625,  2.5469]],
       device='cuda:0')
logits is [[0.48631787 0.35579777 0.1578843 ]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 96%|█████████▌| 97/101 [00:41<00:01,  2.41it/s]

score (tensor([[ 4.7500,  5.1250,  6.9688,  ..., -1.1797, -1.1797,  1.7500]],
       device='cuda:0'), tensor([[5.7188, 6.0000, 8.1875,  ..., 2.0625, 2.0625, 1.6953]],
       device='cuda:0'))
score is tensor([[ 4.7500,  5.1250,  6.9688,  ..., -1.1797, -1.1797,  1.7500]],
       device='cuda:0')
logits is [[0.6780169  0.20042141 0.12156173]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 97%|█████████▋| 98/101 [00:41<00:01,  2.07it/s]

score (tensor([[ 4.8750,  6.0625,  7.4062,  ..., -4.3125, -4.3125,  0.6094]],
       device='cuda:0'), tensor([[4.6875, 5.7500, 8.0625,  ..., 3.0469, 3.0469, 1.4766]],
       device='cuda:0'))
score is tensor([[ 4.8750,  6.0625,  7.4062,  ..., -4.3125, -4.3125,  0.6094]],
       device='cuda:0')
logits is [[0.55546045 0.24648409 0.19805552]]


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
 98%|█████████▊| 99/101 [00:42<00:01,  1.97it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 4.7812,  8.0625,  7.1562,  ..., -2.1719, -2.1719,  0.4961]],
       device='cuda:0'), tensor([[4.5938, 8.0000, 7.1875,  ..., 2.5625, 2.5625, 1.8047]],
       device='cuda:0'))
score is tensor([[ 4.7812,  8.0625,  7.1562,  ..., -2.1719, -2.1719,  0.4961]],
       device='cuda:0')
logits is [[0.6390814  0.18891211 0.17200643]]


 99%|█████████▉| 100/101 [00:42<00:00,  2.10it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


score (tensor([[ 3.6094,  2.8750,  3.8906,  ..., -0.1699, -0.1699,  0.3770]],
       device='cuda:0'), tensor([[-0.5781,  0.1895, -0.3516,  ...,  0.8555,  0.8555,  2.4844]],
       device='cuda:0'))
score is tensor([[ 3.6094,  2.8750,  3.8906,  ..., -0.1699, -0.1699,  0.3770]],
       device='cuda:0')
logits is [[0.67767674 0.22000942 0.10231388]]


100%|██████████| 101/101 [00:43<00:00,  2.34it/s]

score (tensor([[ 4.4062,  5.6562,  6.3750,  ..., -5.3125, -5.3125,  2.5781]],
       device='cuda:0'), tensor([[ 3.7969,  5.4375,  7.1875,  ..., -0.0232, -0.0231,  2.4844]],
       device='cuda:0'))
score is tensor([[ 4.4062,  5.6562,  6.3750,  ..., -5.3125, -5.3125,  2.5781]],
       device='cuda:0')
logits is [[0.28502122 0.4019443  0.31303453]]





In [37]:
# 提取数据
processed_data = []
for item in sub_pred:
    item = item[0]
    id = item[0].item()  # 获取id
    array_values = item[1].tolist()  # 获取array并转换为列表
    processed_data.append([id] + array_values)

In [41]:
new_columns = ['id', 'winner_model_a', 'winner_model_b', 'winner_tie']
df = pd.DataFrame(processed_data, columns=new_columns)

In [43]:
df = df.groupby('id').mean().reset_index()

Unnamed: 0,id,winner_model_a,winner_model_b,winner_tie
0,30192,0.471409,0.377624,0.150967
1,1256092,0.568546,0.268562,0.162891
2,3258431,0.513956,0.387954,0.098090
3,4186011,0.325846,0.554282,0.119872
4,5717448,0.171352,0.721419,0.107229
...,...,...,...,...
79,119110638,0.416873,0.237527,0.345600
80,120519609,0.486318,0.355798,0.157884
81,122160973,0.624186,0.211939,0.163875
82,124418647,0.677677,0.220009,0.102314


In [None]:
prediciton = np.vstack(sub_pred)

In [4]:
from sklearn.metrics import log_loss

In [8]:
prediction = np.array(pd.read_csv("inference_on_test.csv"))

In [9]:
log_loss(test.label, prediction)



1.041973668626307

In [None]:
3.176938522630114

In [None]:
test['label'][:12]