In [None]:
import os, sys
from openpyxl import load_workbook
import pandas as pd, numpy as np
from argparse import Namespace
from datetime import datetime
import evaluate
import pandas as pd
import pickle
import re, json
from datasets import Dataset
from transformers import AutoModelForMaskedLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling, GPT2LMHeadModel
from datasets import load_dataset
from transformers import AutoTokenizer, pipeline, AutoModelForSequenceClassification, TrainingArguments
from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer, RewardTrainer, RewardConfig, get_peft_config, SFTConfig
from tqdm import tqdm
import transformers
import torch
from peft import LoraConfig, TaskType
import random
from functools import partial

# Add the ~/myUtil directory to sys.path
sys.path.append(os.path.expanduser('~/'))
from myUtils.timeUtils import TimeUtils
from myUtils.IOUtils import IOUtils
from KoreanNumber import num2kr





In [None]:
class MyPPOTrainer(PPOTrainer):
    def prepare_dataloader(self, dataset, data_collator=None):
        """
        Prepare the dataloader for training.

        Args:
            dataset (Union[`torch.utils.data.Dataset`, `datasets.Dataset`]):
                PyTorch dataset or Hugging Face dataset. If a Hugging Face dataset is passed, the dataset
                will be preprocessed by removing the columns that are not used by the model.
            data_collator (Optional[function]):
                Data collator function.

        Returns:
            `torch.utils.data.DataLoader`: PyTorch dataloader
        """
        # if isinstance(dataset, Dataset):
        #     dataset = self._remove_unused_columns(dataset)
        print("----------------", dataset)
        dataloader = torch.utils.data.DataLoader(
            dataset,
            batch_size=self.config.batch_size,
            collate_fn=data_collator,
            shuffle=True,
            drop_last=True,
        )
        

        return dataloader
        
def _num_to_str(data):
    if isinstance(data, str):
        data = [data]

    data = [num2kr.num2kr(i, 1) for i in data]    
    
    return [re.sub(r"십([^만])", r"십만\1", i) for i in data]


def preprocess(args):

    if os.path.isfile(args.name_processed_files):
        with open(args.name_processed_files, "rb") as f:
            data = pickle.load(f)
            return data
        
    files = os.listdir(args.dir_files)
    
    data = []
    for file in files:
       
        load_wb = load_workbook(args.dir_files + os.sep + file, data_only=True)    
        load_ws = load_wb[load_wb.sheetnames[0]]

        # Extract data in a table-like format (list of dictionaries)
        header = [header for header in load_ws.iter_rows(min_row=1, max_row = 1, values_only=True)][0]
        rows = [row for row in load_ws.iter_rows(min_row=2, values_only=True)]

        x = pd.DataFrame(rows)
        x.columns = header
        x = x[args.columns_need]
        x['일자'] = x['일자'].map(lambda x : datetime.strptime(x, "%Y/%m/%d") if pd.notna(x) and x != '' else None)
        x = x.dropna(subset = ['일자'])
        # List(일자)[List(가격)[int]]
        # List(carrier[List(가격)[int]])
        data.append(x.groupby("일자")['종가'].apply(list).tolist())
    
    data = [i for j in data for i in j]
    data = [i[:int(len(i)/2)] for i in data] + [i[int(len(i)/2):] for i in data]
    with open(args.name_processed_files, "wb") as f:
        pickle.dump(data, f)

    return data

def get_distribution(data):
    d = [[abs(i2 - i1) for i1, i2 in zip(item, item[1:])] for item in data]
    d = [i for j in d for i in j]
    return round(sum(d) / len(d), 2)


# pretrain/preproces.py로  preprocess된 데이터를 한 번 더 preprocess하는 모듈
def _str_to_num(list_x):
    digits = {'일': 1, '이': 2, '삼': 3, '사': 4, '오': 5, '육': 6, '칠': 7, '팔': 8, '구': 9}
    units = {'먕': 100000, '만': 10000, '천': 1000, '백': 100, '십': 10} # 먕: 10만
    if isinstance(list_x, str):
        list_x = [list_x]

    list_out = []
    for i in list_x:        
        # 중복된 숫자 및 단위 처리
        korean_str = re.sub(r'십만', "먕", i)  # 숫자 중복 제거

        units_only = re.sub(r"[일이삼사오육칠팔구]", "_", korean_str).split("_")            
        units_only = [units[j[0]] for j in units_only if j != ""]

        digits_only = re.sub(r"[^일이삼사오육칠팔구]", "_", korean_str).split("_")
        digits_only = [digits[j[0]] for j in digits_only if j != ""]
        
        out = 0
        for i, j in zip(units_only, digits_only):
            out += i * j
        
        list_out.append(out)
    
    return list_out

def set_reward_dataset(data):

    if isinstance(data, int):
        data = [data]

    rejected_data = []
    # 십만의 자리수 및 만의 자리수에서 바뀌는 데이터는 적절하지 않다. 
    for example in data:
        rejected_exmample = []
        for dataIdx in range(len(example)):
            x = int(round(random.randint(10000, 100000), -4))
            rejected_exmample.append(min(example[dataIdx] + x, 200000))
        rejected_data.append(rejected_exmample)

    rejected_data = [" ".join(_num_to_str(i)) for i in rejected_data]
    data = [" ".join(_num_to_str(i)) for i in data]
    dataset = Dataset.from_dict({"chosen": data, "rejected": rejected_data})
    return dataset

def set_ppo_dataset(data):

    data = [" ".join(_num_to_str(i)) for i in data]
    return Dataset.from_dict({"query": data})

def set_normal_dataset(data):
    data = [" ".join(_num_to_str(i)) for i in data]
    return Dataset.from_dict({"text": data})

    return reward_trainer


def set_reward_model(args):
    #Select a base model whch we need to train for reward modeling.
    model_name = args.reward_checkpoint
    reward_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    reward_model.config.pad_token_id = reward_model.config.eos_token_id

    return reward_model, tokenizer

def set_ppo_model(args):
    config = PPOConfig(
        model_name=args.checkpoint,
        learning_rate=1e-7,
    )

    model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
    tokenizer = AutoTokenizer.from_pretrained(config.model_name)
    tokenizer.pad_token = tokenizer.eos_token

    return model, tokenizer, config

def set_normal_model(args):

    model = GPT2LMHeadModel.from_pretrained(args.checkpoint)
    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_joint)    
    tokenizer.pad_token = tokenizer.eos_token
    model.resize_token_embeddings(len(tokenizer))
    
    return model, tokenizer



def set_reward_model_trainer(args, dataset_train, dataset_eval, model, tokenizer):

    def formatting_func(examples):
        kwargs = {"padding": "max_length", "truncation": True, "max_length": 512, "return_tensors": "pt"}
        tokens_chosen = tokenizer.batch_encode_plus(examples["chosen"], **kwargs)
        tokens_rejected = tokenizer.batch_encode_plus(examples["rejected"], **kwargs)
        return {
            "input_ids_chosen": tokens_chosen["input_ids"], "attention_mask_chosen": tokens_chosen["attention_mask"],
            "input_ids_rejected": tokens_rejected["input_ids"], "attention_mask_rejected": tokens_rejected["attention_mask"]
        }

    formatted_dataset = {}
    formatted_dataset['train'] = dataset_train.map(formatting_func, batched = True, num_proc = args.num_cores)
    formatted_dataset['test'] = dataset_eval.map(formatting_func, batched = True, num_proc = args.num_cores)

    # Configuring the training arguments
    training_args = RewardConfig(
        output_dir=args.output_reward_checkpoint,
        per_device_train_batch_size=16,
        evaluation_strategy="epoch",
        logging_steps=1,
        num_train_epochs = 3,
        report_to=None,
        # center_rewards_coefficient=0.01,
    )

    reward_trainer = RewardTrainer(
        model=model,
        tokenizer=tokenizer,
        args=training_args,
        train_dataset=formatted_dataset['train'],
        eval_dataset=formatted_dataset['test'],

    )

    return reward_trainer


def set_ppo_trainer(args, dataset, model, tokenizer, config):

    def tokenize(sample):
        kwargs = {"padding": "max_length", "truncation": True, "max_length": 512, "return_tensors": "pt"}
        sample["input_ids"] = tokenizer.encode(sample["query"], **kwargs)
        return sample

    dataset = dataset.map(tokenize, batched=False)

    ppo_trainer = MyPPOTrainer(
        model=model,
        config=config,
        dataset=dataset,
        tokenizer=tokenizer,
    )
    
    return ppo_trainer, dataset


def set_normal_trainer(args, dataset_train, dataset_valid, model, tokenizer):
        
    accuracy = evaluate.load('accuracy')
    def metric(eval_pred, func):
        predictions, labels = eval_pred
        predictions = np.argmax(predictions, axis = -1) # (batch, sequence lenagh, hidden_state)
        filters = labels != -100

        predictions = predictions[filters]
        labels = labels[filters]
        return func.compute(predictions = predictions, references = labels)
    
    def tokenize_func(examples):
        kwargs = {"padding": "max_length", "truncation": True, "max_length": 512, "return_tensors": "pt"}
        return tokenizer(examples['text'], **kwargs)

    training_data = dataset_train.map(tokenize_func, batched=True, num_proc = 4)
    valid_data = dataset_valid.map(tokenize_func, batched=True, num_proc = 4)

    training_data = training_data.remove_columns(['text'])
    valid_data = valid_data.remove_columns(['text'])

    od = args.output_normal_checkpoint + os.sep + datetime.strftime(datetime.now(), "%m-%d-%H-%M-%S")
    try: os.mkdir(od)
    except: pass
    trainingarguments = TrainingArguments(
        do_train = True,    
        output_dir = od,                         
        evaluation_strategy = "steps", # necessary: change to step
        save_strategy = "steps",                         
        eval_steps = 50, # necessary: set step
        save_steps = 50,
        save_total_limit = 1,
        load_best_model_at_end = True, # necessary: EarlyStoppingCallBack하려면 True여야 함
        metric_for_best_model = "accuracy",
        greater_is_better = True, # necessary: higher metric results better performance # default = True when metric_for_best_model is set
        num_train_epochs = 10,
        seed = 42,
        per_device_train_batch_size = 512,
        per_device_eval_batch_size = 512,

        # eval_accumulation_steps = 50,
        learning_rate = 1e-7,
        remove_unused_columns = False
    )

    with open(od+ os.sep + "trainingargs.json", "w") as f: 
        f.write(json.dumps(trainingarguments.to_dict(), indent = 2, ensure_ascii = False))
    f.close()
    

    trainer = Trainer(
        model = model,
        args = trainingarguments,
        tokenizer = tokenizer,
        train_dataset = training_data,
        eval_dataset = valid_data,
        data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
        compute_metrics = partial(metric, func = accuracy)
    )

    return trainer


def ppo_trainer_train(ppoTrainer, dataset_train, tokenizer, reward_model):
    generation_kwargs = {
        "min_length": -1,
        "top_k": 0.0,
        "top_p": 1.0,
        "do_sample": True,
        "pad_token_id": tokenizer.eos_token_id,
    }
    epochs = 10
    for _ in tqdm(range(epochs), "epoch: "):
        for batch in tqdm(ppoTrainer.prepare_dataloader(dataset_train)):
            query_tensors = batch["input_ids"]
            inputs = torch.stack(query_tensors)

            #### Get response from SFTModel
            response_tensors = ppoTrainer.generate(inputs, **generation_kwargs)
            batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]

            #### Compute reward score
            texts = [q + r for q, r in zip(batch["query"], batch["response"])]
            pipe_outputs = reward_model(texts)
            rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs]

            #### Run PPO step
            stats = ppoTrainer.step(query_tensors, response_tensors, rewards)
            ppoTrainer.log_stats(stats, batch, rewards)

    #### Save model
    ppoTrainer.save_pretrained(args.output_rlhf_checkpoint)


def generate_decode(args, model_checkpoint, dataset_test):
    model = AutoModelForMaskedLM.from_pretrained(model_checkpoint)
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

    def tokenize_func(examples):
        return tokenizer(" ".join(examples['text'].split(" ")[:10]), truncation=True, padding=True)

    def get_gold(examples):
        return " ".join(examples['text'].split(" ")[10:])
    
    gold_value = dataset_test.map(get_gold, batched = True, num_proce = 4)
    test_data = dataset_test.map(tokenize_func, batched=True, num_proc = 4)

    # Generate predictions
    decoded_outputs = []
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():  # Disable gradient calculation for efficiency
        for example in test_data:
            input_ids = torch.tensor(example['input_ids']).unsqueeze(0)  # Add batch dimension
            outputs = model(input_ids)
            predictions = torch.argmax(outputs.logits, dim=-1)
            
            # Decode the predicted tokens to text
            decoded_text = tokenizer.decode(predictions[0], skip_special_tokens=True)
            decoded_outputs.append(decoded_text)
    
    return decoded_outputs, gold_value

def calculate_gap(args, decoded_outputs, gold_value):
    decoded_outputs = [_str_to_num(i) for i in decoded_outputs]
    gold_value = [_str_to_num(i) for i in gold_value]
    
    min_length = [min(len(i), len(j)) for i, j in zip(decoded_outputs, gold_value)]
    decoded_outputs = [decoded_outputs[:i] for i in min_length]
    gold_value = [gold_value[:i] for i in min_length]

    return sum([sum([abs(ii-jj) for ii, jj in zip(i, j)]) for i, j in zip(decoded_outputs, gold_value)])
    



@TimeUtils.consumedTime_decorator # the arguments should only be a single namespace object
def main(args):
    
    # random.seed(42)
    # data = preprocess(args)
    # random.shuffle(data)

    # dist = get_distribution(data)
    # x = _num_to_str(data[1234])
    # decode_sample = _str_to_num(x)

    # half = int(len(data)/2)
    # dataset_reward = set_reward_dataset(data[:half])
    # dataset_reward_train_test = dataset_reward.train_test_split(0.2)
    # dataset_reward_train = dataset_reward_train_test['train']
    # dataset_reward_valid = dataset_reward_train_test['test'] 

    # dataset_ppo = set_ppo_dataset(data[half:])
    # dataset_ppo_train_test = dataset_ppo.train_test_split(0.2)
    # dataset_ppo_train = dataset_ppo_train_test['train']
    # dataset_ppo_valid_test = dataset_ppo_train_test['test'].train_test_split(0.5)
    # dataset_ppo_valid, dataset_ppo_test = dataset_ppo_valid_test['train'], dataset_ppo_valid_test['test']

    # dataset_normal = set_normal_dataset(data[half:])
    # dataset_normal_train_test = dataset_normal.train_test_split(0.2)
    # dataset_normal_train = dataset_normal_train_test['train']
    # dataset_normal_valid_test = dataset_normal_train_test['test'].train_test_split(0.5)
    # dataset_normal_valid, dataset_normal_test = dataset_normal_valid_test['train'], dataset_normal_valid_test['test']


    # normalModel, normalTokenizer = set_normal_model(args)
    # normalTrainer = set_normal_trainer(args, dataset_normal_train, dataset_normal_valid, normalModel, normalTokenizer)
    # reward_model, reward_tokenizer = set_reward_model(args)
    # rewardTrainer = set_reward_model_trainer(args, dataset_reward_train, dataset_reward_valid, reward_model, reward_tokenizer)
    # ppoModel, ppoTokenizer, ppoConfig = set_ppo_model(args)
    # ppoTrainer, ppoDataset = set_ppo_trainer(args, dataset_ppo_train, ppoModel, ppoTokenizer, ppoConfig)
    
    # # rewardTrainer.train()
    # ppo_trainer_train(ppoTrainer, ppoDataset, ppoTokenizer, reward_model)
    # normalTrainer.train()    

    # output_rlhf, gold_rlfh = generate_decode(args, args.output_rlhf_checkpoint, dataset_normal_test)
    # output_normal, gold_normal = generate_decode(args, args.output_normal_checkpoint, dataset_normal_test)

    # print(calculate_gap(args, output_rlhf, gold_rlfh))
    # print(calculate_gap(args, output_normal, gold_normal))

    # # dataset_reward = set_reward_dataset(data[:half])
    # # dataset_reward_train, dataset_reward_valid = dataset_reward.train_test_split(0.1)
    # # reward_model, reward_tokenizer = set_reward_model(args)
    # # rewardTrainer = set_reward_model_trainer(args, dataset_reward_train, dataset_reward_valid, reward_tokenizer)
    # # rewardTrainer.train()

    # # dataset_ppo = set_ppo_dataset(data[half:])
    # # dataset_ppo_train, dataset_ppo_test = dataset_ppo.train_test_split(0.2)
    # # dataset_ppo_valid, dataset_ppo_test = dataset_ppo_test.train_test_split(0.5)
    # # ppoModel, ppoTokenizer, ppoConfig = set_ppo_model(args)
    # # ppoTrainer = set_ppo_trainer(args, dataset_ppo_train, ppoModel, ppoTokenizer, ppoConfig)
    # # ppo_trainer_train(ppoTrainer, dataset_ppo_train, ppoTokenizer, reward_model)

    # # dataset_normal = set_normal_dataset(data[half:])
    # # dataset_normal_train, dataset_normal_test = dataset_normal.train_test_split(0.2)
    # # dataset_normal_valid, dataset_normal_test = dataset_normal_test.train_test_split(0.5)
    # # normalModel, normalTokenizer = set_normal_model(args)
    # # normalTrainer = set_normal_trainer(args, dataset_normal_train, dataset_normal_valid, normalModel, normalTokenizer)
    # # normalTrainer.train()    

    # # output_rlhf, gold_rlfh = generate_decode(args, args.output_rlhf_checkpoint, dataset_normal_test)
    # # output_normal, gold_normal = generate_decode(args, args.output_normal_checkpoint, dataset_normal_test)

    # # print(calculate_gap(args, output_rlhf, gold_rlfh))
    # # print(calculate_gap(args, output_normal, gold_normal))



In [None]:


if __name__ == "__main__":
    dir_files = "/home/hyohyeongjang/2024aut_comprac/data/data_rlhf"
    name_processed_files = "/home/hyohyeongjang/2024aut_comprac/data/data_rlhf_processed/data_rlhf.pk"
    columns_need = ['일자', '종가']
    checkpoint = "/home/hyohyeongjang/2024aut_comprac/weights/model_joint/10-15-02-51-17/checkpoint-50"
    tokenizer_joint = "/home/hyohyeongjang/2024aut_comprac/tokenizers/tokenizer_joint_jaeyoon"
    reward_model_checkpoint = "distilroberta-base"
    # already trained
    output_reward_checkpoint = "/home/hyohyeongjang/2024aut_comprac/weights/reward_model/checkpoint-936"
    output_rlhf_checkpoint = "/home/hyohyeongjang/2024aut_comprac/weights/rlhf_result"
    output_normal_checkpoint = "/home/hyohyeongjang/2024aut_comprac/weights/no_rlhf_result"
    num_cores = 4

    args = Namespace(
        dir_files = dir_files,
        name_processed_files = name_processed_files,
        columns_need = columns_need,
        checkpoint = checkpoint,
        tokenizer_joint = tokenizer_joint,
        reward_checkpoint = reward_model_checkpoint,
        output_reward_checkpoint = output_reward_checkpoint,
        output_rlhf_checkpoint = output_rlhf_checkpoint,
        output_normal_checkpoint = output_normal_checkpoint,
        num_cores = num_cores,
    )


    random.seed(42)
    data = preprocess(args)
    random.shuffle(data)

    dist = get_distribution(data)
    x = _num_to_str(data[1234])
    decode_sample = _str_to_num(x)

    half = int(len(data)/2)
    dataset_reward = set_reward_dataset(data[:half])
    dataset_reward_train_test = dataset_reward.train_test_split(0.2)
    dataset_reward_train = dataset_reward_train_test['train']
    dataset_reward_valid = dataset_reward_train_test['test'] 

    dataset_ppo = set_ppo_dataset(data[half:])
    dataset_ppo_train_test = dataset_ppo.train_test_split(0.2)
    dataset_ppo_train = dataset_ppo_train_test['train']
    dataset_ppo_valid_test = dataset_ppo_train_test['test'].train_test_split(0.5)
    dataset_ppo_valid, dataset_ppo_test = dataset_ppo_valid_test['train'], dataset_ppo_valid_test['test']

    dataset_normal = set_normal_dataset(data[half:])
    dataset_normal_train_test = dataset_normal.train_test_split(0.2)
    dataset_normal_train = dataset_normal_train_test['train']
    dataset_normal_valid_test = dataset_normal_train_test['test'].train_test_split(0.5)
    dataset_normal_valid, dataset_normal_test = dataset_normal_valid_test['train'], dataset_normal_valid_test['test']


    normalModel, normalTokenizer = set_normal_model(args)
    normalTrainer = set_normal_trainer(args, dataset_normal_train, dataset_normal_valid, normalModel, normalTokenizer)
    reward_model, reward_tokenizer = set_reward_model(args)
    rewardTrainer = set_reward_model_trainer(args, dataset_reward_train, dataset_reward_valid, reward_model, reward_tokenizer)
    ppoModel, ppoTokenizer, ppoConfig = set_ppo_model(args)
    ppoTrainer, ppoDataset = set_ppo_trainer(args, dataset_ppo_train, ppoModel, ppoTokenizer, ppoConfig)
    
    # rewardTrainer.train()
    ppo_trainer_train(ppoTrainer, ppoDataset, ppoTokenizer, reward_model)
    normalTrainer.train()    

    output_rlhf, gold_rlfh = generate_decode(args, args.output_rlhf_checkpoint, dataset_normal_test)
    output_normal, gold_normal = generate_decode(args, args.output_normal_checkpoint, dataset_normal_test)

    print(calculate_gap(args, output_rlhf, gold_rlfh))
    print(calculate_gap(args, output_normal, gold_normal))

Job starts at 2024-11-11 19:28:54


Map (num_proc=4):   0%|          | 0/4980 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/622 [00:00<?, ? examples/s]

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map (num_proc=4):   0%|          | 0/4980 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/1245 [00:00<?, ? examples/s]

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Map:   0%|          | 0/4980 [00:00<?, ? examples/s]

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


---------------- Dataset({
    features: ['query', 'input_ids'],
    num_rows: 4980
})
result------------------------- (AutoModelForCausalLMWithValueHead(
  (pretrained_model): GPT2LMHeadModel(
    (transformer): GPT2Model(
      (wte): Embedding(54, 128)
      (wpe): Embedding(1024, 128)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-3): 4 x GPT2Block(
          (ln_1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (attn): GPT2SdpaAttention(
            (c_attn): Conv1D()
            (c_proj): Conv1D()
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (mlp): GPT2MLP(
            (c_fc): Conv1D()
            (c_proj): Conv1D()
            (act): NewGELUActivation()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (ln_f): LayerNorm((128,), e

epoch:   0%|          | 0/10 [00:00<?, ?it/s]

---------------- Dataset({
    features: ['query', 'input_ids'],
    num_rows: 4980
})


  0%|          | 0/38 [00:00<?, ?it/s]
epoch:   0%|          | 0/10 [00:00<?, ?it/s]


RuntimeError: each element in list of batch should be of equal size