In [1]:
# imports
import pandas as pd
import os, ipdb
import random, evaluate

import random
import string

# Fine-Tune Llama2-7b on custom dataset
import os, ipdb
from dataclasses import dataclass, field
from typing import Optional
import numpy as np
import torch, random
from datasets import DatasetDict, Dataset, load_dataset
from peft import AutoPeftModelForCausalLM, LoraConfig
from tqdm import tqdm
import wandb
from transformers import AutoModelForCausalLM, AutoTokenizer\
, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, TrainerCallback, pipeline

from trl import SFTTrainer
from trl.trainer import ConstantLengthDataset

2024-02-10 03:41:08.924401: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-10 03:41:09.504044: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# from ../evaluation_metrics import Metrics
seed = 42
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)

from evaluation_metrics import Metrics, THRESHOLD

os.environ["TOKENIZERS_PARALLELISM"] = "false" # or "true", depending on your needs

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


@dataclass
class ScriptArguments:
    model_name: Optional[str] = field(default="meta-llama/Llama-2-7b-hf", metadata={"help": "the model name"})
    log_with: Optional[str] = field(default="wandb", metadata={"help": "use 'wandb' to log with wandb"})

    dataset_name: Optional[str] = field(default="lvwerra/stack-exchange-paired", metadata={"help": "the dataset name"})
    subset: Optional[str] = field(default="data/finetune", metadata={"help": "the subset to use"})
    split: Optional[str] = field(default="train", metadata={"help": "the split to use"})
    size_valid_set: Optional[int] = field(default=4000, metadata={"help": "the size of the validation set"})
    streaming: Optional[bool] = field(default=True, metadata={"help": "whether to stream the dataset"})
    shuffle_buffer: Optional[int] = field(default=5000, metadata={"help": "the shuffle buffer size"})
    seq_length: Optional[int] = field(default=1024, metadata={"help": "the sequence length"})
    num_workers: Optional[int] = field(default=4, metadata={"help": "the number of workers"})

    max_steps: Optional[int] = field(default=500, metadata={"help": "the maximum number of sgd steps"})
    logging_steps: Optional[int] = field(default=10, metadata={"help": "the logging frequency"})
    save_steps: Optional[int] = field(default=10, metadata={"help": "the saving frequency"})
    per_device_train_batch_size: Optional[int] = field(default=4, metadata={"help": "the per device train batch size"})
    per_device_eval_batch_size: Optional[int] = field(default=1, metadata={"help": "the per device eval batch size"})
    gradient_accumulation_steps: Optional[int] = field(default=2, metadata={"help": "the gradient accumulation steps"})
    gradient_checkpointing: Optional[bool] = field(
        default=True, metadata={"help": "whether to use gradient checkpointing"}
    )
    group_by_length: Optional[bool] = field(default=False, metadata={"help": "whether to group by length"})
    packing: Optional[bool] = field(default=True, metadata={"help": "whether to use packing for SFTTrainer"})

    lora_alpha: Optional[float] = field(default=16, metadata={"help": "the lora alpha parameter"})
    lora_dropout: Optional[float] = field(default=0.05, metadata={"help": "the lora dropout parameter"})
    lora_r: Optional[int] = field(default=8, metadata={"help": "the lora r parameter"})

    learning_rate: Optional[float] = field(default=1e-4, metadata={"help": "the learning rate"})
    lr_scheduler_type: Optional[str] = field(default="cosine", metadata={"help": "the lr scheduler type"})
    num_warmup_steps: Optional[int] = field(default=100, metadata={"help": "the number of warmup steps"})
    weight_decay: Optional[float] = field(default=0.05, metadata={"help": "the weight decay"})
    optimizer_type: Optional[str] = field(default="paged_adamw_32bit", metadata={"help": "the optimizer type"})

    output_dir: Optional[str] = field(default="./results", metadata={"help": "the output directory"})
    log_freq: Optional[int] = field(default=1, metadata={"help": "the logging frequency"})


parser = HfArgumentParser(ScriptArguments)
script_args = parser.parse_args_into_dataclasses([])[0]

if script_args.group_by_length and script_args.packing:
    raise ValueError("Cannot use both packing and group by length")

def prepare_sample_text(example):
    """Prepare the text from a sample of the dataset."""
    text = f"Question: {example['prompt']}\n\nAnswer: {example['answer']}"
    # text = f"{example['prompt']}\n{example['answer']}"
    return text
    



In [3]:

script_args.size = "7b"
script_args.seq_length = 2400

script_args.save_total_limit = 10
script_args.per_device_train_batch_size = 6
script_args.gradient_accumulation_steps = 2

script_args.test_ckpt = "checkpoint-20000" # "checkpoint-5000"

script_args.test_dataset = "../data/LLLM_AUGMENTED_SUMMARIZED_WITH_ID_ZEROSHOT_TDMS_50_PERCENT/fold1"
script_args.dataset_name = "../data/LLLM_AUGMENTED_SUMMARIZED_WITH_ID_ZEROSHOT_TDMS_50_PERCENT/fold1"

# script_args.test_dataset = "./data/LLLM_AUGMENTED_SUMMARIZED_ZEROSHOT_TDMS_50_PERCENT_LONG/fold1"
# script_args.dataset_name = "./data/LLLM_AUGMENTED_SUMMARIZED_ZEROSHOT_TDMS_50_PERCENT_LONG/fold1"

# script_args.test_dataset = "./data/LLLM_AUGMENTED_SUMMARIZED_ZEROSHOT_TDMS_50_PERCENT_DOCTEAT/fold1"
# script_args.dataset_name = "./data/LLLM_AUGMENTED_SUMMARIZED_ZEROSHOT_TDMS_50_PERCENT_DOCTEAT/fold1"


script_args.model_name = "meta-llama/Llama-2-7b-hf"
script_args.output_dir = f"../model_ckpt/augmented_summ_with_id_zeroshot_llama2_{script_args.size}_tdms_f1_50_percent_seq_len_{script_args.seq_length}"
script_args.run_name = f"eval_sft_augmented_summ_with_id_zeroshot_llama2_{script_args.size}_tdms_50_percent_seq_len_{script_args.seq_length}"


# script_args.model_name = "mistralai/Mistral-7B-v0.1"
# script_args.output_dir = f"../model_ckpt/augmented_summ_with_id_zeroshot_mistralai_{script_args.size}_tdms_f1_50_percent_seq_len_{script_args.seq_length}"
# script_args.run_name = f"eval_sft_augmented_summ_with_id_zeroshot_mistralai_{script_args.size}_tdms_50_percent_seq_len_{script_args.seq_length}"



script_args.per_device_train_batch_size = 3
script_args.gradient_accumulation_steps = 2
script_args.per_device_eval_batch_size = 2


script_args.save_steps = 1000
script_args.eval_steps = 1000
script_args.evaluation_strategy = 1000
script_args.logging_steps = 1000
script_args.streaming = False
script_args.num_train_epochs = 5
script_args.save_total_limit = 50

script_args.random_test_sub = 500

script_args.save_strategy = "steps" #"epoch"
script_args.evaluation_strategy= "steps" #"epoch",

mode = "validation"
# mode = "zeroshot"

In [4]:
model = AutoPeftModelForCausalLM.from_pretrained(
        f"{script_args.output_dir}/{script_args.test_ckpt}",
        low_cpu_mem_usage=True,
        torch_dtype=torch.bfloat16,
        load_in_4bit=True,
        use_auth_token="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",
    )

model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(
        f"{script_args.output_dir}/{script_args.test_ckpt}",
        use_auth_token="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",
    )
  
dataset = DatasetDict.load_from_disk(f"{script_args.test_dataset}")
    
valid_data = dataset[mode].shuffle(seed=42)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [None]:
labels = []
preds = []
idx_skip = []
processed_paper = []
# template_selected = "drop_1"
# i = 0
# array(['squad_1', 'squad_2', 'squad_3', 'squad_4', 'squad_5', 'squad_6',
#        'squad_7', 'squad_8', 'drop_1', 'drop_2', 'drop_3', 'drop_4',
#        'drop_5', 'drop_6', 'drop_7'], dtype=object)


for idx, valid_ex in tqdm(enumerate(valid_data), total=len(valid_data)):

    if valid_ex['id'] in processed_paper :
        continue
        

    prompt = f"Question: {valid_ex['prompt']}"
    
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)

    if inputs.shape[1] > 5510:
        # ipdb.set_trace()
        # print(f"Validation index {idx} skipped because input.shape: {inputs.shape}, input split length: {len(valid_ex['prompt'].split())}")
        idx_skip.append((inputs.shape[1], len(valid_ex['prompt'].split())))
        continue 

    # if len(processed_paper) ==  1582:
    #     break
        
    if inputs.shape[1] > 5000:
    
        print(f"inputs.shape: {inputs.shape}, input split length: {len(valid_ex['prompt'].split())}")
    
        generate_kwargs = dict(
            input_ids=inputs,
            max_new_tokens=256,
            pad_token_id=tokenizer.eos_token_id
        )
        
        outputs = model.generate(**generate_kwargs)
        predictions = tokenizer.decode(outputs[0])
        
        preds.append(predictions.split("Answer: ")[-1].replace("</s>", ""))
        labels.append(valid_ex['answer'])

        # break
    
    processed_paper.append(valid_ex['id'])
        
        # ipdb.set_trace()
        
        # if len(processed_paper)>= 20:
        #     break
            
        # if idx >= 20 :
        #     break
        
        # if idx == len(valid_data)//2 :
        #     results = Metrics.evaluate_property_wise_json_based(label_list=labels, prediction_list=preds)
        #     results.update(Metrics.evaluate_rouge(label_list=labels, prediction_list=preds))
        #     print(f"Intermediate Results:")
        #     for key, value in results.items():
        #         print(f"{key}: {value}")

  0%|          | 0/12315 [00:00<?, ?it/s]

inputs.shape: torch.Size([1, 809]), input split length: 374


  0%|          | 28/12315 [00:01<11:10, 18.32it/s]

inputs.shape: torch.Size([1, 767]), input split length: 378


  0%|          | 55/12315 [00:02<07:36, 26.87it/s]

inputs.shape: torch.Size([1, 3291]), input split length: 1880


  1%|          | 76/12315 [00:04<12:07, 16.83it/s]

inputs.shape: torch.Size([1, 1214]), input split length: 733


  1%|          | 101/12315 [00:17<50:29,  4.03it/s]

inputs.shape: torch.Size([1, 482]), input split length: 286


  1%|          | 104/12315 [00:18<49:44,  4.09it/s]

inputs.shape: torch.Size([1, 2370]), input split length: 1464


  1%|          | 113/12315 [00:31<1:45:44,  1.92it/s]

inputs.shape: torch.Size([1, 575]), input split length: 377


  1%|          | 115/12315 [00:44<3:03:48,  1.11it/s]

Validation index 145 skipped because input.shape: torch.Size([1, 5526]), input split length: 2809
Validation index 160 skipped because input.shape: torch.Size([1, 7599]), input split length: 3618
Validation index 182 skipped because input.shape: torch.Size([1, 6829]), input split length: 1929
inputs.shape: torch.Size([1, 553]), input split length: 259


  2%|▏         | 188/12315 [00:45<43:31,  4.64it/s]  

inputs.shape: torch.Size([1, 399]), input split length: 233


  2%|▏         | 193/12315 [00:46<44:43,  4.52it/s]

inputs.shape: torch.Size([1, 3765]), input split length: 2297


  2%|▏         | 197/12315 [00:49<49:38,  4.07it/s]

inputs.shape: torch.Size([1, 641]), input split length: 311


  2%|▏         | 202/12315 [00:49<46:51,  4.31it/s]

inputs.shape: torch.Size([1, 5120]), input split length: 2483


This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.
  2%|▏         | 222/12315 [01:06<1:34:39,  2.13it/s]

inputs.shape: torch.Size([1, 4969]), input split length: 1946


  2%|▏         | 230/12315 [01:21<2:29:03,  1.35it/s]

inputs.shape: torch.Size([1, 1897]), input split length: 998


  2%|▏         | 241/12315 [01:35<2:55:11,  1.15it/s]

inputs.shape: torch.Size([1, 507]), input split length: 292


  2%|▏         | 245/12315 [01:35<2:37:17,  1.28it/s]

inputs.shape: torch.Size([1, 3179]), input split length: 1837


  2%|▏         | 262/12315 [01:37<1:39:38,  2.02it/s]

inputs.shape: torch.Size([1, 1144]), input split length: 652


  2%|▏         | 277/12315 [01:38<1:08:15,  2.94it/s]

inputs.shape: torch.Size([1, 4298]), input split length: 2152


  2%|▏         | 280/12315 [01:53<2:33:19,  1.31it/s]

inputs.shape: torch.Size([1, 3672]), input split length: 2546


  2%|▏         | 303/12315 [01:57<1:35:31,  2.10it/s]

inputs.shape: torch.Size([1, 1892]), input split length: 916


  3%|▎         | 355/12315 [02:12<1:10:43,  2.82it/s]

Validation index 377 skipped because input.shape: torch.Size([1, 5923]), input split length: 2601
inputs.shape: torch.Size([1, 1711]), input split length: 940


  3%|▎         | 379/12315 [02:27<1:26:48,  2.29it/s]

inputs.shape: torch.Size([1, 2070]), input split length: 1138


  3%|▎         | 382/12315 [02:38<1:58:55,  1.67it/s]

inputs.shape: torch.Size([1, 3766]), input split length: 2005


  3%|▎         | 401/12315 [02:40<1:29:12,  2.23it/s]

inputs.shape: torch.Size([1, 617]), input split length: 363


  3%|▎         | 429/12315 [02:53<1:30:07,  2.20it/s]

inputs.shape: torch.Size([1, 4048]), input split length: 1929


  4%|▎         | 439/12315 [02:56<1:22:47,  2.39it/s]

inputs.shape: torch.Size([1, 990]), input split length: 474


  4%|▎         | 442/12315 [02:56<1:20:25,  2.46it/s]

inputs.shape: torch.Size([1, 4582]), input split length: 2294


  4%|▎         | 450/12315 [03:11<2:19:24,  1.42it/s]

inputs.shape: torch.Size([1, 89]), input split length: 47


  4%|▍         | 492/12315 [03:24<1:31:51,  2.15it/s]

inputs.shape: torch.Size([1, 683]), input split length: 452


  4%|▍         | 518/12315 [03:25<1:01:29,  3.20it/s]

inputs.shape: torch.Size([1, 1561]), input split length: 886


  4%|▍         | 535/12315 [03:39<1:24:18,  2.33it/s]

inputs.shape: torch.Size([1, 794]), input split length: 538


  4%|▍         | 541/12315 [03:39<1:17:27,  2.53it/s]

inputs.shape: torch.Size([1, 4526]), input split length: 2351


  4%|▍         | 545/12315 [03:54<2:20:07,  1.40it/s]

inputs.shape: torch.Size([1, 5196]), input split length: 2448


  5%|▍         | 562/12315 [04:10<2:37:07,  1.25it/s]

inputs.shape: torch.Size([1, 1177]), input split length: 704


  5%|▍         | 577/12315 [04:11<1:50:39,  1.77it/s]

inputs.shape: torch.Size([1, 264]), input split length: 188


  5%|▍         | 587/12315 [04:12<1:27:25,  2.24it/s]

inputs.shape: torch.Size([1, 467]), input split length: 310


  5%|▌         | 635/12315 [04:25<1:05:54,  2.95it/s]

inputs.shape: torch.Size([1, 436]), input split length: 290


  5%|▌         | 648/12315 [04:25<54:51,  3.54it/s]  

inputs.shape: torch.Size([1, 2449]), input split length: 1609


  5%|▌         | 664/12315 [04:39<1:22:53,  2.34it/s]

inputs.shape: torch.Size([1, 2281]), input split length: 1267


  6%|▌         | 757/12315 [04:53<45:41,  4.22it/s]  

inputs.shape: torch.Size([1, 496]), input split length: 326


  6%|▌         | 765/12315 [04:53<43:12,  4.45it/s]

inputs.shape: torch.Size([1, 3631]), input split length: 2271


  6%|▋         | 779/12315 [04:55<40:58,  4.69it/s]

inputs.shape: torch.Size([1, 3806]), input split length: 1684


  6%|▋         | 792/12315 [05:10<1:11:17,  2.69it/s]

inputs.shape: torch.Size([1, 2729]), input split length: 1301


  7%|▋         | 811/12315 [05:24<1:29:30,  2.14it/s]

inputs.shape: torch.Size([1, 1804]), input split length: 1301


  7%|▋         | 881/12315 [05:25<38:28,  4.95it/s]  

inputs.shape: torch.Size([1, 3097]), input split length: 1752


  7%|▋         | 912/12315 [05:39<51:13,  3.71it/s]

inputs.shape: torch.Size([1, 3315]), input split length: 1958


  8%|▊         | 953/12315 [05:53<56:06,  3.38it/s]

inputs.shape: torch.Size([1, 312]), input split length: 213


  8%|▊         | 959/12315 [05:54<53:29,  3.54it/s]

inputs.shape: torch.Size([1, 1147]), input split length: 583


  8%|▊         | 967/12315 [06:07<1:22:38,  2.29it/s]

inputs.shape: torch.Size([1, 1382]), input split length: 859


  8%|▊         | 1010/12315 [06:10<50:53,  3.70it/s] 

inputs.shape: torch.Size([1, 4553]), input split length: 2324


  8%|▊         | 1014/12315 [06:25<1:28:30,  2.13it/s]

Validation index 1026 skipped because input.shape: torch.Size([1, 5909]), input split length: 2709
inputs.shape: torch.Size([1, 361]), input split length: 225


  8%|▊         | 1039/12315 [06:26<59:41,  3.15it/s]  

inputs.shape: torch.Size([1, 335]), input split length: 239


  9%|▊         | 1052/12315 [06:26<49:08,  3.82it/s]

inputs.shape: torch.Size([1, 4665]), input split length: 2772


  9%|▊         | 1053/12315 [06:41<1:49:20,  1.72it/s]

inputs.shape: torch.Size([1, 1862]), input split length: 1097


  9%|▊         | 1067/12315 [06:42<1:20:18,  2.33it/s]

inputs.shape: torch.Size([1, 3807]), input split length: 1419


  9%|▊         | 1073/12315 [06:44<1:18:40,  2.38it/s]

inputs.shape: torch.Size([1, 206]), input split length: 131


  9%|▊         | 1077/12315 [06:45<1:11:28,  2.62it/s]

inputs.shape: torch.Size([1, 3457]), input split length: 2078


  9%|▉         | 1113/12315 [06:47<33:21,  5.60it/s]  

inputs.shape: torch.Size([1, 1398]), input split length: 856


  9%|▉         | 1120/12315 [06:48<31:51,  5.86it/s]

Validation index 1135 skipped because input.shape: torch.Size([1, 6073]), input split length: 2633
inputs.shape: torch.Size([1, 243]), input split length: 152


  9%|▉         | 1142/12315 [06:48<20:51,  8.93it/s]

inputs.shape: torch.Size([1, 4236]), input split length: 2549


  9%|▉         | 1165/12315 [07:03<57:38,  3.22it/s]

inputs.shape: torch.Size([1, 2856]), input split length: 1635


 10%|▉         | 1193/12315 [07:17<1:11:16,  2.60it/s]

inputs.shape: torch.Size([1, 2951]), input split length: 1883


 10%|▉         | 1208/12315 [07:19<59:11,  3.13it/s]  

inputs.shape: torch.Size([1, 624]), input split length: 451


 10%|▉         | 1225/12315 [07:19<44:43,  4.13it/s]

inputs.shape: torch.Size([1, 4308]), input split length: 2181


 10%|▉         | 1226/12315 [07:34<1:44:37,  1.77it/s]

inputs.shape: torch.Size([1, 2031]), input split length: 1001


 10%|█         | 1268/12315 [07:35<47:17,  3.89it/s]  

inputs.shape: torch.Size([1, 3258]), input split length: 1901


 10%|█         | 1280/12315 [07:49<1:19:13,  2.32it/s]

inputs.shape: torch.Size([1, 4475]), input split length: 1946


 10%|█         | 1281/12315 [08:04<2:17:46,  1.33it/s]

Validation index 1288 skipped because input.shape: torch.Size([1, 8594]), input split length: 4072
inputs.shape: torch.Size([1, 574]), input split length: 373


 11%|█         | 1309/12315 [08:05<1:14:52,  2.45it/s]

inputs.shape: torch.Size([1, 2442]), input split length: 1546


 11%|█         | 1375/12315 [08:18<52:07,  3.50it/s]  

inputs.shape: torch.Size([1, 4576]), input split length: 1992


 11%|█▏        | 1387/12315 [08:33<1:16:02,  2.40it/s]

inputs.shape: torch.Size([1, 464]), input split length: 301


 11%|█▏        | 1388/12315 [08:46<1:52:34,  1.62it/s]

inputs.shape: torch.Size([1, 732]), input split length: 479


 11%|█▏        | 1396/12315 [08:47<1:37:23,  1.87it/s]

inputs.shape: torch.Size([1, 2535]), input split length: 1486


 11%|█▏        | 1416/12315 [08:48<1:07:31,  2.69it/s]

inputs.shape: torch.Size([1, 3531]), input split length: 2276


 12%|█▏        | 1442/12315 [08:51<46:16,  3.92it/s]  

inputs.shape: torch.Size([1, 2209]), input split length: 1240


 12%|█▏        | 1464/12315 [08:52<34:25,  5.25it/s]

inputs.shape: torch.Size([1, 3596]), input split length: 2236


 12%|█▏        | 1490/12315 [08:56<31:36,  5.71it/s]

inputs.shape: torch.Size([1, 4348]), input split length: 2659


 12%|█▏        | 1496/12315 [09:10<1:14:40,  2.41it/s]

inputs.shape: torch.Size([1, 867]), input split length: 505


 12%|█▏        | 1525/12315 [09:11<45:01,  3.99it/s]  

inputs.shape: torch.Size([1, 862]), input split length: 455


 13%|█▎        | 1562/12315 [09:12<26:51,  6.67it/s]

Validation index 1581 skipped because input.shape: torch.Size([1, 9243]), input split length: 3963
inputs.shape: torch.Size([1, 1561]), input split length: 923


 13%|█▎        | 1591/12315 [09:13<19:59,  8.94it/s]

inputs.shape: torch.Size([1, 399]), input split length: 232


 13%|█▎        | 1595/12315 [09:13<20:17,  8.80it/s]

inputs.shape: torch.Size([1, 719]), input split length: 492


 13%|█▎        | 1603/12315 [09:14<19:26,  9.18it/s]

inputs.shape: torch.Size([1, 5031]), input split length: 2071


 13%|█▎        | 1638/12315 [09:29<47:49,  3.72it/s]

inputs.shape: torch.Size([1, 3126]), input split length: 1921


 13%|█▎        | 1648/12315 [09:33<51:13,  3.47it/s]

inputs.shape: torch.Size([1, 579]), input split length: 325


 13%|█▎        | 1660/12315 [09:34<41:51,  4.24it/s]

inputs.shape: torch.Size([1, 3456]), input split length: 1959


 14%|█▎        | 1663/12315 [09:48<1:39:49,  1.78it/s]

inputs.shape: torch.Size([1, 3847]), input split length: 1824


 14%|█▎        | 1672/12315 [10:02<2:23:44,  1.23it/s]

inputs.shape: torch.Size([1, 534]), input split length: 281


 14%|█▍        | 1746/12315 [10:15<1:00:23,  2.92it/s]

inputs.shape: torch.Size([1, 5142]), input split length: 3025


 14%|█▍        | 1752/12315 [10:31<1:30:56,  1.94it/s]

inputs.shape: torch.Size([1, 473]), input split length: 302


 14%|█▍        | 1758/12315 [10:31<1:23:23,  2.11it/s]

inputs.shape: torch.Size([1, 450]), input split length: 277


 14%|█▍        | 1763/12315 [10:32<1:16:33,  2.30it/s]

inputs.shape: torch.Size([1, 3400]), input split length: 1972


 14%|█▍        | 1776/12315 [10:46<1:49:02,  1.61it/s]

inputs.shape: torch.Size([1, 2624]), input split length: 1084


 14%|█▍        | 1779/12315 [10:47<1:46:34,  1.65it/s]

inputs.shape: torch.Size([1, 2512]), input split length: 981


 14%|█▍        | 1782/12315 [11:01<3:02:19,  1.04s/it]

inputs.shape: torch.Size([1, 3677]), input split length: 2014


 15%|█▍        | 1795/12315 [11:15<3:06:59,  1.07s/it]

inputs.shape: torch.Size([1, 3702]), input split length: 1558


 15%|█▍        | 1803/12315 [11:30<3:40:37,  1.26s/it]

inputs.shape: torch.Size([1, 4377]), input split length: 2160


 15%|█▍        | 1817/12315 [11:45<3:26:15,  1.18s/it]

inputs.shape: torch.Size([1, 348]), input split length: 205


 15%|█▌        | 1855/12315 [11:45<1:22:37,  2.11it/s]

Validation index 1855 skipped because input.shape: torch.Size([1, 5697]), input split length: 2751
inputs.shape: torch.Size([1, 4104]), input split length: 2059


 15%|█▌        | 1864/12315 [12:00<1:56:40,  1.49it/s]

inputs.shape: torch.Size([1, 375]), input split length: 244


 15%|█▌        | 1867/12315 [12:00<1:50:16,  1.58it/s]

inputs.shape: torch.Size([1, 1815]), input split length: 1021


 15%|█▌        | 1889/12315 [12:01<1:05:15,  2.66it/s]

inputs.shape: torch.Size([1, 751]), input split length: 229


 15%|█▌        | 1898/12315 [12:15<1:43:47,  1.67it/s]

Validation index 1901 skipped because input.shape: torch.Size([1, 5632]), input split length: 2434
inputs.shape: torch.Size([1, 4507]), input split length: 1931


 16%|█▌        | 1996/12315 [12:30<44:44,  3.84it/s]  

inputs.shape: torch.Size([1, 490]), input split length: 333


 16%|█▋        | 2002/12315 [12:30<42:56,  4.00it/s]

inputs.shape: torch.Size([1, 5213]), input split length: 2693


 16%|█▋        | 2006/12315 [12:46<1:18:47,  2.18it/s]

inputs.shape: torch.Size([1, 2912]), input split length: 2046


 17%|█▋        | 2052/12315 [13:00<1:05:37,  2.61it/s]

inputs.shape: torch.Size([1, 2538]), input split length: 948


 17%|█▋        | 2054/12315 [13:14<1:37:47,  1.75it/s]

inputs.shape: torch.Size([1, 2681]), input split length: 1420


 17%|█▋        | 2060/12315 [13:28<2:10:46,  1.31it/s]

Validation index 2076 skipped because input.shape: torch.Size([1, 6046]), input split length: 3026
inputs.shape: torch.Size([1, 2175]), input split length: 1234


 17%|█▋        | 2080/12315 [13:29<1:29:10,  1.91it/s]

inputs.shape: torch.Size([1, 4702]), input split length: 2329


 17%|█▋        | 2119/12315 [13:44<1:17:29,  2.19it/s]

inputs.shape: torch.Size([1, 3402]), input split length: 1756


 17%|█▋        | 2130/12315 [13:58<1:40:44,  1.69it/s]

inputs.shape: torch.Size([1, 536]), input split length: 413


 17%|█▋        | 2149/12315 [13:59<1:12:58,  2.32it/s]

inputs.shape: torch.Size([1, 5430]), input split length: 2146


 17%|█▋        | 2155/12315 [14:16<1:59:33,  1.42it/s]

inputs.shape: torch.Size([1, 3655]), input split length: 1959


 18%|█▊        | 2214/12315 [14:30<1:10:48,  2.38it/s]

inputs.shape: torch.Size([1, 4528]), input split length: 2683


 18%|█▊        | 2221/12315 [14:45<1:37:53,  1.72it/s]

inputs.shape: torch.Size([1, 2938]), input split length: 1697


 18%|█▊        | 2250/12315 [14:59<1:31:27,  1.83it/s]

inputs.shape: torch.Size([1, 3064]), input split length: 1603


 18%|█▊        | 2257/12315 [15:01<1:26:00,  1.95it/s]

inputs.shape: torch.Size([1, 3986]), input split length: 2043


 18%|█▊        | 2264/12315 [15:15<2:01:36,  1.38it/s]

inputs.shape: torch.Size([1, 2866]), input split length: 1443


 19%|█▊        | 2301/12315 [15:29<1:31:34,  1.82it/s]

inputs.shape: torch.Size([1, 2585]), input split length: 1583


 19%|█▉        | 2310/12315 [15:33<1:28:34,  1.88it/s]

inputs.shape: torch.Size([1, 2607]), input split length: 1259


 19%|█▉        | 2321/12315 [15:47<1:53:28,  1.47it/s]

inputs.shape: torch.Size([1, 1101]), input split length: 643


 19%|█▉        | 2341/12315 [15:48<1:15:49,  2.19it/s]

inputs.shape: torch.Size([1, 2284]), input split length: 1050


 19%|█▉        | 2343/12315 [15:49<1:17:00,  2.16it/s]

inputs.shape: torch.Size([1, 554]), input split length: 389


 19%|█▉        | 2369/12315 [15:49<42:30,  3.90it/s]  

inputs.shape: torch.Size([1, 4328]), input split length: 2077


 19%|█▉        | 2370/12315 [16:04<1:42:35,  1.62it/s]

inputs.shape: torch.Size([1, 1749]), input split length: 806


 19%|█▉        | 2378/12315 [16:05<1:24:17,  1.96it/s]

inputs.shape: torch.Size([1, 1132]), input split length: 748


 19%|█▉        | 2387/12315 [16:18<2:06:28,  1.31it/s]

inputs.shape: torch.Size([1, 4529]), input split length: 2330


 20%|█▉        | 2405/12315 [16:33<2:11:06,  1.26it/s]

inputs.shape: torch.Size([1, 1473]), input split length: 684


 20%|█▉        | 2409/12315 [16:34<1:59:49,  1.38it/s]

inputs.shape: torch.Size([1, 3253]), input split length: 1696


 20%|█▉        | 2457/12315 [16:49<1:11:44,  2.29it/s]

inputs.shape: torch.Size([1, 679]), input split length: 283


 20%|█▉        | 2459/12315 [16:49<1:11:01,  2.31it/s]

inputs.shape: torch.Size([1, 2098]), input split length: 1178


 20%|██        | 2488/12315 [16:53<47:58,  3.41it/s]  

inputs.shape: torch.Size([1, 402]), input split length: 278


 20%|██        | 2495/12315 [16:53<43:20,  3.78it/s]

inputs.shape: torch.Size([1, 3954]), input split length: 1971


 20%|██        | 2497/12315 [16:56<50:50,  3.22it/s]

inputs.shape: torch.Size([1, 317]), input split length: 154


 20%|██        | 2498/12315 [16:56<52:10,  3.14it/s]

inputs.shape: torch.Size([1, 1949]), input split length: 862


 20%|██        | 2506/12315 [16:57<43:35,  3.75it/s]

inputs.shape: torch.Size([1, 2800]), input split length: 1594


 20%|██        | 2520/12315 [17:11<1:32:52,  1.76it/s]

inputs.shape: torch.Size([1, 3303]), input split length: 1763


 21%|██        | 2525/12315 [17:25<2:37:44,  1.03it/s]

inputs.shape: torch.Size([1, 2916]), input split length: 1544


 21%|██        | 2564/12315 [17:27<57:15,  2.84it/s]  

inputs.shape: torch.Size([1, 849]), input split length: 543


 21%|██        | 2594/12315 [17:27<34:37,  4.68it/s]

inputs.shape: torch.Size([1, 825]), input split length: 497


 21%|██        | 2603/12315 [17:28<31:09,  5.20it/s]

inputs.shape: torch.Size([1, 2003]), input split length: 1188


 21%|██▏       | 2622/12315 [17:32<30:44,  5.25it/s]

inputs.shape: torch.Size([1, 2181]), input split length: 930


 22%|██▏       | 2670/12315 [17:33<16:29,  9.75it/s]

inputs.shape: torch.Size([1, 328]), input split length: 187


 22%|██▏       | 2681/12315 [17:33<15:11, 10.57it/s]

inputs.shape: torch.Size([1, 3210]), input split length: 1826


 22%|██▏       | 2685/12315 [17:47<54:33,  2.94it/s]

inputs.shape: torch.Size([1, 3551]), input split length: 2191


 22%|██▏       | 2693/12315 [17:49<52:20,  3.06it/s]

Validation index 2696 skipped because input.shape: torch.Size([1, 5810]), input split length: 2728
inputs.shape: torch.Size([1, 608]), input split length: 436


 22%|██▏       | 2705/12315 [17:50<40:29,  3.96it/s]

inputs.shape: torch.Size([1, 373]), input split length: 267


 22%|██▏       | 2712/12315 [18:03<1:26:58,  1.84it/s]

inputs.shape: torch.Size([1, 3739]), input split length: 2036


 22%|██▏       | 2714/12315 [18:17<2:40:31,  1.00s/it]

inputs.shape: torch.Size([1, 3550]), input split length: 1946


 22%|██▏       | 2725/12315 [18:32<2:56:53,  1.11s/it]

inputs.shape: torch.Size([1, 3264]), input split length: 1881


 22%|██▏       | 2755/12315 [18:46<1:57:31,  1.36it/s]

inputs.shape: torch.Size([1, 3875]), input split length: 1714


 22%|██▏       | 2760/12315 [19:00<2:38:58,  1.00it/s]

inputs.shape: torch.Size([1, 2587]), input split length: 1340


 23%|██▎       | 2777/12315 [19:02<1:44:47,  1.52it/s]

inputs.shape: torch.Size([1, 2283]), input split length: 1289


 23%|██▎       | 2802/12315 [19:15<1:36:50,  1.64it/s]

inputs.shape: torch.Size([1, 2190]), input split length: 1165


 23%|██▎       | 2815/12315 [19:17<1:16:35,  2.07it/s]

inputs.shape: torch.Size([1, 328]), input split length: 166


 23%|██▎       | 2820/12315 [19:17<1:09:27,  2.28it/s]

inputs.shape: torch.Size([1, 281]), input split length: 166


 23%|██▎       | 2834/12315 [19:18<48:58,  3.23it/s]  

inputs.shape: torch.Size([1, 2799]), input split length: 1756


 23%|██▎       | 2841/12315 [19:19<45:57,  3.44it/s]

inputs.shape: torch.Size([1, 4372]), input split length: 2301


 23%|██▎       | 2855/12315 [19:34<1:28:01,  1.79it/s]

inputs.shape: torch.Size([1, 288]), input split length: 199


 23%|██▎       | 2872/12315 [19:34<56:39,  2.78it/s]  

inputs.shape: torch.Size([1, 403]), input split length: 295


 23%|██▎       | 2879/12315 [19:35<48:37,  3.23it/s]

inputs.shape: torch.Size([1, 2210]), input split length: 1021


 23%|██▎       | 2883/12315 [19:36<48:25,  3.25it/s]

inputs.shape: torch.Size([1, 2524]), input split length: 1671


 23%|██▎       | 2894/12315 [19:50<1:37:38,  1.61it/s]

inputs.shape: torch.Size([1, 4957]), input split length: 2349


 24%|██▍       | 2926/12315 [20:05<1:24:10,  1.86it/s]

inputs.shape: torch.Size([1, 943]), input split length: 568


 24%|██▍       | 2934/12315 [20:18<1:53:53,  1.37it/s]

Validation index 2939 skipped because input.shape: torch.Size([1, 6531]), input split length: 1952
inputs.shape: torch.Size([1, 607]), input split length: 300


 24%|██▍       | 2945/12315 [20:19<1:27:10,  1.79it/s]

inputs.shape: torch.Size([1, 2329]), input split length: 1260


 24%|██▍       | 2967/12315 [20:20<54:12,  2.87it/s]  

inputs.shape: torch.Size([1, 3260]), input split length: 1553


 24%|██▍       | 2972/12315 [20:34<1:40:27,  1.55it/s]

inputs.shape: torch.Size([1, 4396]), input split length: 2319


 24%|██▍       | 2981/12315 [20:49<2:16:52,  1.14it/s]

inputs.shape: torch.Size([1, 3523]), input split length: 1943


 24%|██▍       | 2988/12315 [21:03<2:53:55,  1.12s/it]

inputs.shape: torch.Size([1, 3498]), input split length: 1961


 24%|██▍       | 3014/12315 [21:05<1:28:53,  1.74it/s]

inputs.shape: torch.Size([1, 1527]), input split length: 827


 25%|██▍       | 3025/12315 [21:19<1:52:52,  1.37it/s]

inputs.shape: torch.Size([1, 1123]), input split length: 766


 25%|██▍       | 3041/12315 [21:19<1:17:35,  1.99it/s]

inputs.shape: torch.Size([1, 3036]), input split length: 1690


 25%|██▍       | 3043/12315 [21:34<2:14:05,  1.15it/s]

inputs.shape: torch.Size([1, 3030]), input split length: 1664


 25%|██▍       | 3062/12315 [21:37<1:29:52,  1.72it/s]

inputs.shape: torch.Size([1, 789]), input split length: 507


 25%|██▍       | 3067/12315 [21:38<1:20:09,  1.92it/s]

inputs.shape: torch.Size([1, 1301]), input split length: 651


 25%|██▌       | 3132/12315 [21:39<23:36,  6.48it/s]  

inputs.shape: torch.Size([1, 3833]), input split length: 2335


 26%|██▌       | 3141/12315 [21:53<51:20,  2.98it/s]

inputs.shape: torch.Size([1, 4102]), input split length: 2133


 26%|██▌       | 3173/12315 [22:08<58:25,  2.61it/s]

Validation index 3180 skipped because input.shape: torch.Size([1, 12204]), input split length: 3607
inputs.shape: torch.Size([1, 210]), input split length: 132


 26%|██▌       | 3200/12315 [22:09<40:41,  3.73it/s]

inputs.shape: torch.Size([1, 2935]), input split length: 1692


 26%|██▌       | 3204/12315 [22:13<47:17,  3.21it/s]

inputs.shape: torch.Size([1, 3405]), input split length: 1817


 26%|██▌       | 3207/12315 [22:27<1:29:55,  1.69it/s]

inputs.shape: torch.Size([1, 2854]), input split length: 1539


 26%|██▌       | 3220/12315 [22:41<1:50:36,  1.37it/s]

inputs.shape: torch.Size([1, 4261]), input split length: 2772


 26%|██▋       | 3239/12315 [22:56<1:53:02,  1.34it/s]

inputs.shape: torch.Size([1, 5053]), input split length: 2882


 26%|██▋       | 3243/12315 [23:11<2:40:14,  1.06s/it]

inputs.shape: torch.Size([1, 2007]), input split length: 1415


 27%|██▋       | 3269/12315 [23:25<2:00:41,  1.25it/s]

inputs.shape: torch.Size([1, 1939]), input split length: 1060


 27%|██▋       | 3275/12315 [23:26<1:47:53,  1.40it/s]

inputs.shape: torch.Size([1, 512]), input split length: 237


 27%|██▋       | 3315/12315 [23:26<47:47,  3.14it/s]  

inputs.shape: torch.Size([1, 2965]), input split length: 1622


 27%|██▋       | 3318/12315 [23:40<1:25:13,  1.76it/s]

inputs.shape: torch.Size([1, 210]), input split length: 122


 27%|██▋       | 3321/12315 [23:41<1:20:38,  1.86it/s]

inputs.shape: torch.Size([1, 4167]), input split length: 2264


 27%|██▋       | 3336/12315 [23:56<1:43:39,  1.44it/s]

Validation index 3359 skipped because input.shape: torch.Size([1, 11721]), input split length: 4925
inputs.shape: torch.Size([1, 571]), input split length: 421


 28%|██▊       | 3388/12315 [23:56<38:48,  3.83it/s]  

inputs.shape: torch.Size([1, 691]), input split length: 516


 28%|██▊       | 3440/12315 [24:09<38:01,  3.89it/s]

inputs.shape: torch.Size([1, 1389]), input split length: 846


 28%|██▊       | 3450/12315 [24:23<56:21,  2.62it/s]

inputs.shape: torch.Size([1, 811]), input split length: 380


 29%|██▉       | 3564/12315 [24:23<19:53,  7.33it/s]

inputs.shape: torch.Size([1, 520]), input split length: 299


 29%|██▉       | 3572/12315 [24:24<19:17,  7.55it/s]

inputs.shape: torch.Size([1, 92]), input split length: 55


 29%|██▉       | 3578/12315 [24:25<18:58,  7.67it/s]

Validation index 3577 skipped because input.shape: torch.Size([1, 7118]), input split length: 2334
inputs.shape: torch.Size([1, 465]), input split length: 265


 29%|██▉       | 3612/12315 [24:25<13:25, 10.81it/s]

inputs.shape: torch.Size([1, 514]), input split length: 362


 29%|██▉       | 3623/12315 [24:26<12:37, 11.47it/s]

inputs.shape: torch.Size([1, 521]), input split length: 353


 29%|██▉       | 3630/12315 [24:26<12:32, 11.54it/s]

inputs.shape: torch.Size([1, 2551]), input split length: 1410


 30%|██▉       | 3636/12315 [24:28<15:24,  9.38it/s]

inputs.shape: torch.Size([1, 257]), input split length: 171


 30%|██▉       | 3641/12315 [24:30<22:38,  6.39it/s]

inputs.shape: torch.Size([1, 5317]), input split length: 2967


 30%|██▉       | 3645/12315 [24:46<1:30:35,  1.59it/s]

Validation index 3645 skipped because input.shape: torch.Size([1, 6846]), input split length: 3203
inputs.shape: torch.Size([1, 2838]), input split length: 1711


 30%|██▉       | 3653/12315 [24:48<1:14:06,  1.95it/s]

inputs.shape: torch.Size([1, 953]), input split length: 697


 30%|██▉       | 3662/12315 [24:49<55:41,  2.59it/s]  

inputs.shape: torch.Size([1, 834]), input split length: 442


 30%|██▉       | 3671/12315 [24:50<46:52,  3.07it/s]

inputs.shape: torch.Size([1, 431]), input split length: 281


 30%|███       | 3698/12315 [24:51<22:17,  6.44it/s]

inputs.shape: torch.Size([1, 2132]), input split length: 1122


 30%|███       | 3700/12315 [25:04<1:15:44,  1.90it/s]

inputs.shape: torch.Size([1, 1850]), input split length: 1167


 31%|███       | 3760/12315 [25:18<44:50,  3.18it/s]  

inputs.shape: torch.Size([1, 5092]), input split length: 2049


 31%|███       | 3775/12315 [25:33<1:05:04,  2.19it/s]

inputs.shape: torch.Size([1, 1125]), input split length: 609


 31%|███       | 3782/12315 [25:47<1:29:26,  1.59it/s]

inputs.shape: torch.Size([1, 422]), input split length: 244


 31%|███       | 3788/12315 [25:47<1:19:47,  1.78it/s]

inputs.shape: torch.Size([1, 1831]), input split length: 903


 31%|███       | 3791/12315 [26:01<2:05:55,  1.13it/s]

inputs.shape: torch.Size([1, 832]), input split length: 579


 31%|███       | 3796/12315 [26:01<1:48:15,  1.31it/s]

inputs.shape: torch.Size([1, 889]), input split length: 668


 31%|███       | 3803/12315 [26:02<1:24:47,  1.67it/s]

inputs.shape: torch.Size([1, 1384]), input split length: 521


 31%|███       | 3808/12315 [26:03<1:12:36,  1.95it/s]

Validation index 3808 skipped because input.shape: torch.Size([1, 6402]), input split length: 3481
inputs.shape: torch.Size([1, 3543]), input split length: 2101


 31%|███       | 3811/12315 [26:17<2:41:47,  1.14s/it]

inputs.shape: torch.Size([1, 3314]), input split length: 1724


 31%|███       | 3823/12315 [26:31<2:44:01,  1.16s/it]

inputs.shape: torch.Size([1, 4620]), input split length: 1712


 31%|███       | 3833/12315 [26:47<3:02:15,  1.29s/it]

Validation index 3849 skipped because input.shape: torch.Size([1, 11398]), input split length: 3922
inputs.shape: torch.Size([1, 2235]), input split length: 1302


 31%|███▏      | 3871/12315 [26:48<1:05:26,  2.15it/s]

inputs.shape: torch.Size([1, 4849]), input split length: 2553


 32%|███▏      | 3885/12315 [27:03<1:27:54,  1.60it/s]

inputs.shape: torch.Size([1, 2395]), input split length: 935


 32%|███▏      | 3891/12315 [27:17<1:59:57,  1.17it/s]

inputs.shape: torch.Size([1, 1763]), input split length: 763


 32%|███▏      | 3977/12315 [27:18<32:36,  4.26it/s]  

inputs.shape: torch.Size([1, 3921]), input split length: 2220


 32%|███▏      | 3991/12315 [27:20<31:12,  4.45it/s]

inputs.shape: torch.Size([1, 1661]), input split length: 947


 32%|███▏      | 4002/12315 [27:24<32:43,  4.23it/s]

Validation index 4006 skipped because input.shape: torch.Size([1, 6018]), input split length: 2440
inputs.shape: torch.Size([1, 636]), input split length: 408


 33%|███▎      | 4012/12315 [27:37<55:31,  2.49it/s]

inputs.shape: torch.Size([1, 672]), input split length: 363


 33%|███▎      | 4040/12315 [27:38<35:26,  3.89it/s]

inputs.shape: torch.Size([1, 4267]), input split length: 2441


 33%|███▎      | 4047/12315 [27:52<1:06:11,  2.08it/s]

inputs.shape: torch.Size([1, 447]), input split length: 265


 33%|███▎      | 4048/12315 [27:53<1:06:28,  2.07it/s]

inputs.shape: torch.Size([1, 2571]), input split length: 1308


 33%|███▎      | 4052/12315 [28:06<1:51:19,  1.24it/s]

Validation index 4060 skipped because input.shape: torch.Size([1, 5941]), input split length: 2815
inputs.shape: torch.Size([1, 498]), input split length: 293


 33%|███▎      | 4098/12315 [28:19<1:02:59,  2.17it/s]

inputs.shape: torch.Size([1, 2152]), input split length: 1025


 33%|███▎      | 4104/12315 [28:20<58:55,  2.32it/s]  

inputs.shape: torch.Size([1, 3739]), input split length: 2236


 33%|███▎      | 4113/12315 [28:34<1:29:44,  1.52it/s]

inputs.shape: torch.Size([1, 686]), input split length: 493


 34%|███▎      | 4152/12315 [28:35<42:09,  3.23it/s]  

inputs.shape: torch.Size([1, 473]), input split length: 273


 34%|███▎      | 4156/12315 [28:36<40:27,  3.36it/s]

inputs.shape: torch.Size([1, 649]), input split length: 445


 34%|███▍      | 4159/12315 [28:36<39:36,  3.43it/s]

inputs.shape: torch.Size([1, 3637]), input split length: 2260


 34%|███▍      | 4175/12315 [28:51<1:09:33,  1.95it/s]

inputs.shape: torch.Size([1, 3320]), input split length: 1856


 34%|███▍      | 4194/12315 [29:05<1:21:17,  1.67it/s]

inputs.shape: torch.Size([1, 4097]), input split length: 1943


 34%|███▍      | 4198/12315 [29:20<2:03:50,  1.09it/s]

inputs.shape: torch.Size([1, 3209]), input split length: 1269


 34%|███▍      | 4226/12315 [29:21<1:05:19,  2.06it/s]

inputs.shape: torch.Size([1, 348]), input split length: 261


 34%|███▍      | 4247/12315 [29:22<43:30,  3.09it/s]  

inputs.shape: torch.Size([1, 1023]), input split length: 641


 35%|███▍      | 4257/12315 [29:23<36:59,  3.63it/s]

inputs.shape: torch.Size([1, 269]), input split length: 166


 35%|███▍      | 4263/12315 [29:23<33:24,  4.02it/s]

inputs.shape: torch.Size([1, 343]), input split length: 196


 35%|███▍      | 4268/12315 [29:24<30:39,  4.37it/s]

inputs.shape: torch.Size([1, 1462]), input split length: 1047


 35%|███▍      | 4270/12315 [29:25<32:55,  4.07it/s]

inputs.shape: torch.Size([1, 1976]), input split length: 944


 35%|███▍      | 4272/12315 [29:26<36:49,  3.64it/s]

inputs.shape: torch.Size([1, 808]), input split length: 388


 35%|███▍      | 4284/12315 [29:39<1:27:08,  1.54it/s]

inputs.shape: torch.Size([1, 4833]), input split length: 2688


 35%|███▍      | 4305/12315 [29:54<1:32:04,  1.45it/s]

inputs.shape: torch.Size([1, 1383]), input split length: 665


 35%|███▍      | 4310/12315 [30:08<2:12:58,  1.00it/s]

inputs.shape: torch.Size([1, 2787]), input split length: 1628


 35%|███▌      | 4319/12315 [30:22<2:33:58,  1.16s/it]

inputs.shape: torch.Size([1, 2776]), input split length: 1668


 35%|███▌      | 4339/12315 [30:36<2:04:34,  1.07it/s]

inputs.shape: torch.Size([1, 4133]), input split length: 2229


 35%|███▌      | 4343/12315 [30:50<2:47:58,  1.26s/it]

inputs.shape: torch.Size([1, 4592]), input split length: 2407


 35%|███▌      | 4352/12315 [31:05<3:02:12,  1.37s/it]

inputs.shape: torch.Size([1, 116]), input split length: 79


 35%|███▌      | 4358/12315 [31:06<2:26:12,  1.10s/it]

inputs.shape: torch.Size([1, 3567]), input split length: 1979


 35%|███▌      | 4363/12315 [31:20<3:12:27,  1.45s/it]

inputs.shape: torch.Size([1, 5242]), input split length: 2518


 36%|███▌      | 4372/12315 [31:36<3:24:37,  1.55s/it]

inputs.shape: torch.Size([1, 1401]), input split length: 696


 36%|███▌      | 4388/12315 [31:36<1:53:11,  1.17it/s]

inputs.shape: torch.Size([1, 3311]), input split length: 2077


 36%|███▌      | 4390/12315 [31:38<1:54:27,  1.15it/s]

Validation index 4394 skipped because input.shape: torch.Size([1, 8735]), input split length: 4389
inputs.shape: torch.Size([1, 661]), input split length: 482


 36%|███▌      | 4415/12315 [31:52<1:28:12,  1.49it/s]

Validation index 4416 skipped because input.shape: torch.Size([1, 8008]), input split length: 2916
inputs.shape: torch.Size([1, 4148]), input split length: 1998


 36%|███▌      | 4427/12315 [32:06<1:48:42,  1.21it/s]

inputs.shape: torch.Size([1, 342]), input split length: 238


 36%|███▌      | 4432/12315 [32:07<1:35:00,  1.38it/s]

inputs.shape: torch.Size([1, 3137]), input split length: 1979


 36%|███▌      | 4436/12315 [32:08<1:29:15,  1.47it/s]

inputs.shape: torch.Size([1, 5219]), input split length: 2466


 36%|███▌      | 4448/12315 [32:24<1:59:40,  1.10it/s]

inputs.shape: torch.Size([1, 626]), input split length: 400


 36%|███▌      | 4460/12315 [32:25<1:19:53,  1.64it/s]

inputs.shape: torch.Size([1, 4494]), input split length: 2771


 36%|███▋      | 4466/12315 [32:39<2:08:53,  1.01it/s]

inputs.shape: torch.Size([1, 4078]), input split length: 1673


 37%|███▋      | 4499/12315 [32:54<1:25:31,  1.52it/s]

inputs.shape: torch.Size([1, 3422]), input split length: 1352


 37%|███▋      | 4500/12315 [32:58<1:36:56,  1.34it/s]

inputs.shape: torch.Size([1, 1252]), input split length: 970


 37%|███▋      | 4501/12315 [32:59<1:37:10,  1.34it/s]

inputs.shape: torch.Size([1, 2096]), input split length: 1140


 37%|███▋      | 4506/12315 [33:13<2:29:30,  1.15s/it]

inputs.shape: torch.Size([1, 4689]), input split length: 2760


 37%|███▋      | 4507/12315 [33:28<4:08:39,  1.91s/it]

inputs.shape: torch.Size([1, 3750]), input split length: 2192


 37%|███▋      | 4521/12315 [33:42<3:08:28,  1.45s/it]

inputs.shape: torch.Size([1, 419]), input split length: 261


 37%|███▋      | 4557/12315 [33:43<1:05:37,  1.97it/s]

inputs.shape: torch.Size([1, 1194]), input split length: 469


 37%|███▋      | 4563/12315 [33:44<58:59,  2.19it/s]  

inputs.shape: torch.Size([1, 309]), input split length: 136


 37%|███▋      | 4574/12315 [33:44<45:08,  2.86it/s]

inputs.shape: torch.Size([1, 2637]), input split length: 1489


 37%|███▋      | 4585/12315 [33:48<44:41,  2.88it/s]

inputs.shape: torch.Size([1, 243]), input split length: 134


 37%|███▋      | 4589/12315 [33:48<41:09,  3.13it/s]

inputs.shape: torch.Size([1, 1593]), input split length: 867


 37%|███▋      | 4602/12315 [34:00<1:07:18,  1.91it/s]

inputs.shape: torch.Size([1, 485]), input split length: 353


 38%|███▊      | 4628/12315 [34:00<34:18,  3.73it/s]  

inputs.shape: torch.Size([1, 2664]), input split length: 1471


 38%|███▊      | 4633/12315 [34:02<34:26,  3.72it/s]

inputs.shape: torch.Size([1, 1359]), input split length: 859


 38%|███▊      | 4635/12315 [34:03<35:50,  3.57it/s]

inputs.shape: torch.Size([1, 1755]), input split length: 838


 38%|███▊      | 4652/12315 [34:04<23:14,  5.50it/s]

inputs.shape: torch.Size([1, 4029]), input split length: 1826


 38%|███▊      | 4659/12315 [34:18<1:13:43,  1.73it/s]

inputs.shape: torch.Size([1, 3414]), input split length: 2165


 38%|███▊      | 4729/12315 [34:30<33:40,  3.75it/s]  

inputs.shape: torch.Size([1, 3365]), input split length: 1526


 39%|███▊      | 4754/12315 [34:31<26:49,  4.70it/s]

inputs.shape: torch.Size([1, 4981]), input split length: 1933


 39%|███▊      | 4761/12315 [34:47<51:29,  2.45it/s]

inputs.shape: torch.Size([1, 1445]), input split length: 527


 39%|███▉      | 4793/12315 [34:48<32:29,  3.86it/s]

inputs.shape: torch.Size([1, 2016]), input split length: 1353


 39%|███▉      | 4807/12315 [34:49<27:59,  4.47it/s]

inputs.shape: torch.Size([1, 468]), input split length: 321


 39%|███▉      | 4809/12315 [34:49<28:16,  4.42it/s]

inputs.shape: torch.Size([1, 298]), input split length: 209


 39%|███▉      | 4811/12315 [34:50<28:32,  4.38it/s]

inputs.shape: torch.Size([1, 708]), input split length: 415


 39%|███▉      | 4843/12315 [34:50<14:18,  8.70it/s]

inputs.shape: torch.Size([1, 394]), input split length: 279


 39%|███▉      | 4856/12315 [34:51<12:11, 10.19it/s]

inputs.shape: torch.Size([1, 3566]), input split length: 1936


 39%|███▉      | 4858/12315 [35:05<56:04,  2.22it/s]

inputs.shape: torch.Size([1, 4583]), input split length: 2622


 40%|███▉      | 4877/12315 [35:20<1:12:48,  1.70it/s]

Validation index 4892 skipped because input.shape: torch.Size([1, 6474]), input split length: 2919
inputs.shape: torch.Size([1, 347]), input split length: 217


 40%|████      | 4927/12315 [35:21<29:30,  4.17it/s]  

inputs.shape: torch.Size([1, 575]), input split length: 324


 40%|████      | 4934/12315 [35:34<50:47,  2.42it/s]

inputs.shape: torch.Size([1, 1648]), input split length: 836


 40%|████      | 4941/12315 [35:35<46:11,  2.66it/s]

inputs.shape: torch.Size([1, 5363]), input split length: 2636


 40%|████      | 4946/12315 [35:50<1:26:52,  1.41it/s]

inputs.shape: torch.Size([1, 4630]), input split length: 2091


 40%|████      | 4963/12315 [36:05<1:34:32,  1.30it/s]

inputs.shape: torch.Size([1, 2108]), input split length: 1073


 40%|████      | 4983/12315 [36:19<1:29:50,  1.36it/s]

inputs.shape: torch.Size([1, 4602]), input split length: 2238


 40%|████      | 4986/12315 [36:34<2:08:52,  1.06s/it]

inputs.shape: torch.Size([1, 1575]), input split length: 736


 41%|████      | 5053/12315 [36:35<38:36,  3.13it/s]  

inputs.shape: torch.Size([1, 1473]), input split length: 859


 41%|████      | 5068/12315 [36:48<51:09,  2.36it/s]

inputs.shape: torch.Size([1, 3226]), input split length: 1701


 41%|████      | 5072/12315 [37:02<1:16:04,  1.59it/s]

Validation index 5076 skipped because input.shape: torch.Size([1, 8836]), input split length: 3817
inputs.shape: torch.Size([1, 3528]), input split length: 1864


 41%|████▏     | 5085/12315 [37:17<1:29:00,  1.35it/s]

inputs.shape: torch.Size([1, 1313]), input split length: 897


 41%|████▏     | 5101/12315 [37:17<1:04:51,  1.85it/s]

inputs.shape: torch.Size([1, 2713]), input split length: 1147


 42%|████▏     | 5145/12315 [37:19<32:10,  3.71it/s]  

Validation index 5155 skipped because input.shape: torch.Size([1, 7043]), input split length: 3494
inputs.shape: torch.Size([1, 3181]), input split length: 1983


 42%|████▏     | 5191/12315 [37:33<34:00,  3.49it/s]

inputs.shape: torch.Size([1, 4828]), input split length: 2702


 42%|████▏     | 5204/12315 [37:48<49:40,  2.39it/s]

inputs.shape: torch.Size([1, 3509]), input split length: 2186


 42%|████▏     | 5215/12315 [37:50<45:21,  2.61it/s]

inputs.shape: torch.Size([1, 2580]), input split length: 1661


 43%|████▎     | 5260/12315 [38:04<41:02,  2.87it/s]

inputs.shape: torch.Size([1, 4024]), input split length: 2427


 43%|████▎     | 5280/12315 [38:19<51:16,  2.29it/s]

Validation index 5293 skipped because input.shape: torch.Size([1, 9972]), input split length: 4613
inputs.shape: torch.Size([1, 1174]), input split length: 716


 43%|████▎     | 5310/12315 [38:20<35:11,  3.32it/s]

inputs.shape: torch.Size([1, 4592]), input split length: 2479


 43%|████▎     | 5312/12315 [38:35<1:00:49,  1.92it/s]

inputs.shape: torch.Size([1, 1071]), input split length: 562


 43%|████▎     | 5318/12315 [38:35<55:25,  2.10it/s]  

inputs.shape: torch.Size([1, 3634]), input split length: 2401


 43%|████▎     | 5322/12315 [38:37<55:30,  2.10it/s]

inputs.shape: torch.Size([1, 4733]), input split length: 1831


 43%|████▎     | 5328/12315 [38:52<1:37:12,  1.20it/s]

Validation index 5343 skipped because input.shape: torch.Size([1, 7380]), input split length: 3785
inputs.shape: torch.Size([1, 2947]), input split length: 1713


 43%|████▎     | 5347/12315 [39:07<1:32:20,  1.26it/s]

Validation index 5361 skipped because input.shape: torch.Size([1, 6606]), input split length: 3065
inputs.shape: torch.Size([1, 254]), input split length: 157


 44%|████▍     | 5426/12315 [39:07<25:55,  4.43it/s]  

inputs.shape: torch.Size([1, 3309]), input split length: 914


 44%|████▍     | 5438/12315 [39:22<40:45,  2.81it/s]

inputs.shape: torch.Size([1, 429]), input split length: 293


 44%|████▍     | 5446/12315 [39:22<36:58,  3.10it/s]

inputs.shape: torch.Size([1, 2560]), input split length: 1305


 44%|████▍     | 5448/12315 [39:36<1:06:32,  1.72it/s]

inputs.shape: torch.Size([1, 3958]), input split length: 2020


 44%|████▍     | 5449/12315 [39:50<1:50:16,  1.04it/s]

inputs.shape: torch.Size([1, 539]), input split length: 369


 45%|████▍     | 5506/12315 [39:51<34:44,  3.27it/s]  

inputs.shape: torch.Size([1, 615]), input split length: 426


 45%|████▍     | 5516/12315 [39:52<30:33,  3.71it/s]

inputs.shape: torch.Size([1, 1391]), input split length: 783


 45%|████▍     | 5533/12315 [40:05<45:51,  2.46it/s]

Validation index 5541 skipped because input.shape: torch.Size([1, 6456]), input split length: 3395
inputs.shape: torch.Size([1, 4200]), input split length: 2493


 45%|████▌     | 5550/12315 [40:20<59:54,  1.88it/s]

inputs.shape: torch.Size([1, 5093]), input split length: 1767


 45%|████▌     | 5594/12315 [40:35<41:27,  2.70it/s]  

Validation index 5581 skipped because input.shape: torch.Size([1, 13113]), input split length: 5620
Validation index 5593 skipped because input.shape: torch.Size([1, 6429]), input split length: 2838
inputs.shape: torch.Size([1, 2753]), input split length: 1592


 46%|████▌     | 5619/12315 [40:49<48:06,  2.32it/s]

inputs.shape: torch.Size([1, 572]), input split length: 383


 46%|████▌     | 5621/12315 [40:50<47:32,  2.35it/s]

inputs.shape: torch.Size([1, 3118]), input split length: 1792


 46%|████▌     | 5669/12315 [40:51<23:10,  4.78it/s]

inputs.shape: torch.Size([1, 4911]), input split length: 2751


 46%|████▌     | 5695/12315 [41:07<35:40,  3.09it/s]

Validation index 5707 skipped because input.shape: torch.Size([1, 6082]), input split length: 2758
inputs.shape: torch.Size([1, 4831]), input split length: 2254


 46%|████▋     | 5717/12315 [41:22<46:13,  2.38it/s]

inputs.shape: torch.Size([1, 659]), input split length: 456


 47%|████▋     | 5747/12315 [41:23<31:07,  3.52it/s]

inputs.shape: torch.Size([1, 1802]), input split length: 795


 47%|████▋     | 5752/12315 [41:36<51:00,  2.14it/s]

inputs.shape: torch.Size([1, 2428]), input split length: 1456


 47%|████▋     | 5765/12315 [41:47<1:00:03,  1.82it/s]

inputs.shape: torch.Size([1, 1801]), input split length: 995


 47%|████▋     | 5768/12315 [42:01<1:28:42,  1.23it/s]

inputs.shape: torch.Size([1, 1937]), input split length: 1138


 47%|████▋     | 5769/12315 [42:14<2:11:47,  1.21s/it]

inputs.shape: torch.Size([1, 2492]), input split length: 1490


 47%|████▋     | 5779/12315 [42:28<2:16:59,  1.26s/it]

inputs.shape: torch.Size([1, 702]), input split length: 525


 47%|████▋     | 5807/12315 [42:28<1:02:40,  1.73it/s]

inputs.shape: torch.Size([1, 415]), input split length: 205
inputs.shape: torch.Size([1, 2248]), input split length: 1362


 47%|████▋     | 5812/12315 [42:43<1:32:45,  1.17it/s]

inputs.shape: torch.Size([1, 1207]), input split length: 636


 47%|████▋     | 5827/12315 [42:43<1:02:32,  1.73it/s]

inputs.shape: torch.Size([1, 5113]), input split length: 2694


 48%|████▊     | 5871/12315 [42:59<47:38,  2.25it/s]  

inputs.shape: torch.Size([1, 2065]), input split length: 1071


 48%|████▊     | 5893/12315 [43:00<35:02,  3.06it/s]

inputs.shape: torch.Size([1, 444]), input split length: 288


 48%|████▊     | 5900/12315 [43:01<31:51,  3.36it/s]

inputs.shape: torch.Size([1, 1672]), input split length: 651


 48%|████▊     | 5904/12315 [43:02<31:18,  3.41it/s]

inputs.shape: torch.Size([1, 2731]), input split length: 1616


 48%|████▊     | 5949/12315 [43:13<28:26,  3.73it/s]

inputs.shape: torch.Size([1, 4260]), input split length: 2602


 48%|████▊     | 5951/12315 [43:28<55:31,  1.91it/s]

inputs.shape: torch.Size([1, 3063]), input split length: 1858


 48%|████▊     | 5961/12315 [43:29<48:01,  2.21it/s]

inputs.shape: torch.Size([1, 3795]), input split length: 2176


 49%|████▊     | 5979/12315 [43:32<35:56,  2.94it/s]

inputs.shape: torch.Size([1, 4254]), input split length: 2598


 49%|████▊     | 5990/12315 [43:46<1:00:10,  1.75it/s]

inputs.shape: torch.Size([1, 426]), input split length: 249


 49%|████▉     | 6019/12315 [43:59<54:03,  1.94it/s]  

inputs.shape: torch.Size([1, 3048]), input split length: 1551


 49%|████▉     | 6047/12315 [44:14<53:22,  1.96it/s]

inputs.shape: torch.Size([1, 2217]), input split length: 983


 49%|████▉     | 6054/12315 [44:17<53:04,  1.97it/s]

inputs.shape: torch.Size([1, 2580]), input split length: 1622


 49%|████▉     | 6072/12315 [44:21<42:54,  2.42it/s]

inputs.shape: torch.Size([1, 4063]), input split length: 1992


 49%|████▉     | 6083/12315 [44:23<38:36,  2.69it/s]

inputs.shape: torch.Size([1, 492]), input split length: 247


 50%|████▉     | 6096/12315 [44:24<29:31,  3.51it/s]

inputs.shape: torch.Size([1, 674]), input split length: 411


 50%|████▉     | 6099/12315 [44:24<28:55,  3.58it/s]

inputs.shape: torch.Size([1, 3001]), input split length: 1711


 50%|████▉     | 6101/12315 [44:38<1:18:20,  1.32it/s]

inputs.shape: torch.Size([1, 1868]), input split length: 983


 50%|████▉     | 6107/12315 [44:49<1:41:49,  1.02it/s]

inputs.shape: torch.Size([1, 2886]), input split length: 1831


 50%|████▉     | 6133/12315 [45:03<1:14:06,  1.39it/s]

inputs.shape: torch.Size([1, 2969]), input split length: 1835


 50%|████▉     | 6144/12315 [45:04<58:52,  1.75it/s]  

inputs.shape: torch.Size([1, 2761]), input split length: 1502


 50%|████▉     | 6149/12315 [45:18<1:32:07,  1.12it/s]

Validation index 6150 skipped because input.shape: torch.Size([1, 6883]), input split length: 3028
inputs.shape: torch.Size([1, 2126]), input split length: 1387


 50%|█████     | 6177/12315 [45:32<1:09:10,  1.48it/s]

inputs.shape: torch.Size([1, 2997]), input split length: 1177


 50%|█████     | 6182/12315 [45:33<1:04:39,  1.58it/s]

inputs.shape: torch.Size([1, 4135]), input split length: 2401


 50%|█████     | 6200/12315 [45:48<1:11:42,  1.42it/s]

inputs.shape: torch.Size([1, 563]), input split length: 385


 50%|█████     | 6201/12315 [46:01<1:48:42,  1.07s/it]

inputs.shape: torch.Size([1, 4298]), input split length: 2440


 51%|█████     | 6239/12315 [46:16<1:05:56,  1.54it/s]

inputs.shape: torch.Size([1, 93]), input split length: 50


 51%|█████     | 6244/12315 [46:29<1:26:16,  1.17it/s]

inputs.shape: torch.Size([1, 522]), input split length: 247


 51%|█████     | 6245/12315 [46:30<1:25:29,  1.18it/s]

inputs.shape: torch.Size([1, 3830]), input split length: 2025


 51%|█████     | 6257/12315 [46:44<1:37:26,  1.04it/s]

Validation index 6257 skipped because input.shape: torch.Size([1, 5827]), input split length: 2708
inputs.shape: torch.Size([1, 264]), input split length: 163


 51%|█████     | 6276/12315 [46:45<56:50,  1.77it/s]  

Validation index 6276 skipped because input.shape: torch.Size([1, 6931]), input split length: 2976
inputs.shape: torch.Size([1, 1985]), input split length: 1210


 51%|█████     | 6281/12315 [46:58<1:26:35,  1.16it/s]

inputs.shape: torch.Size([1, 470]), input split length: 296


 51%|█████     | 6288/12315 [46:59<1:09:30,  1.45it/s]

inputs.shape: torch.Size([1, 612]), input split length: 318


 51%|█████▏    | 6313/12315 [46:59<34:05,  2.93it/s]  

inputs.shape: torch.Size([1, 87]), input split length: 41


 51%|█████▏    | 6331/12315 [47:12<47:04,  2.12it/s]

inputs.shape: torch.Size([1, 345]), input split length: 187


 51%|█████▏    | 6332/12315 [47:13<47:16,  2.11it/s]

inputs.shape: torch.Size([1, 2252]), input split length: 1016


 52%|█████▏    | 6348/12315 [47:14<31:58,  3.11it/s]

inputs.shape: torch.Size([1, 4646]), input split length: 2271


 52%|█████▏    | 6359/12315 [47:29<1:00:25,  1.64it/s]

inputs.shape: torch.Size([1, 2175]), input split length: 1272


 52%|█████▏    | 6360/12315 [47:30<1:02:22,  1.59it/s]

inputs.shape: torch.Size([1, 2870]), input split length: 1262


 52%|█████▏    | 6389/12315 [47:32<28:37,  3.45it/s]  

inputs.shape: torch.Size([1, 288]), input split length: 177


 52%|█████▏    | 6410/12315 [47:32<18:41,  5.26it/s]

inputs.shape: torch.Size([1, 3299]), input split length: 1623


 52%|█████▏    | 6415/12315 [47:47<48:36,  2.02it/s]

inputs.shape: torch.Size([1, 2226]), input split length: 1385


 52%|█████▏    | 6419/12315 [48:00<1:21:28,  1.21it/s]

inputs.shape: torch.Size([1, 782]), input split length: 499


 52%|█████▏    | 6426/12315 [48:01<1:05:17,  1.50it/s]

inputs.shape: torch.Size([1, 2367]), input split length: 1319


 52%|█████▏    | 6433/12315 [48:07<1:09:57,  1.40it/s]

inputs.shape: torch.Size([1, 3135]), input split length: 1783


 52%|█████▏    | 6446/12315 [48:09<47:24,  2.06it/s]  

Validation index 6459 skipped because input.shape: torch.Size([1, 9337]), input split length: 4159
inputs.shape: torch.Size([1, 744]), input split length: 495


 53%|█████▎    | 6477/12315 [48:09<21:30,  4.52it/s]

inputs.shape: torch.Size([1, 5005]), input split length: 1676


 53%|█████▎    | 6516/12315 [48:25<29:48,  3.24it/s]

inputs.shape: torch.Size([1, 4332]), input split length: 2037


 53%|█████▎    | 6517/12315 [48:39<55:13,  1.75it/s]

inputs.shape: torch.Size([1, 4673]), input split length: 2106


 53%|█████▎    | 6531/12315 [48:54<1:08:03,  1.42it/s]

inputs.shape: torch.Size([1, 809]), input split length: 508


 53%|█████▎    | 6534/12315 [48:55<1:04:31,  1.49it/s]

inputs.shape: torch.Size([1, 3405]), input split length: 1799


 53%|█████▎    | 6549/12315 [49:09<1:13:50,  1.30it/s]

inputs.shape: torch.Size([1, 1496]), input split length: 907


 53%|█████▎    | 6560/12315 [49:23<1:25:34,  1.12it/s]

inputs.shape: torch.Size([1, 3590]), input split length: 1777


 53%|█████▎    | 6567/12315 [49:37<1:47:20,  1.12s/it]

inputs.shape: torch.Size([1, 4348]), input split length: 2291


 54%|█████▎    | 6598/12315 [49:52<1:12:45,  1.31it/s]

inputs.shape: torch.Size([1, 1281]), input split length: 654


 54%|█████▎    | 6599/12315 [49:53<1:12:58,  1.31it/s]

inputs.shape: torch.Size([1, 2657]), input split length: 1492


 54%|█████▍    | 6636/12315 [50:07<51:27,  1.84it/s]  

inputs.shape: torch.Size([1, 3897]), input split length: 2155


 54%|█████▍    | 6638/12315 [50:09<53:56,  1.75it/s]

inputs.shape: torch.Size([1, 3854]), input split length: 1841


 54%|█████▍    | 6649/12315 [50:24<1:11:29,  1.32it/s]

inputs.shape: torch.Size([1, 728]), input split length: 430


 54%|█████▍    | 6681/12315 [50:37<54:34,  1.72it/s]  

inputs.shape: torch.Size([1, 179]), input split length: 115


 54%|█████▍    | 6684/12315 [50:37<52:01,  1.80it/s]

inputs.shape: torch.Size([1, 2039]), input split length: 1087


 55%|█████▍    | 6759/12315 [50:38<16:38,  5.57it/s]

inputs.shape: torch.Size([1, 2933]), input split length: 1892


 55%|█████▌    | 6778/12315 [50:52<26:54,  3.43it/s]

inputs.shape: torch.Size([1, 3894]), input split length: 2525


 55%|█████▌    | 6814/12315 [51:07<30:19,  3.02it/s]

Validation index 6833 skipped because input.shape: torch.Size([1, 7260]), input split length: 2655
Validation index 6850 skipped because input.shape: torch.Size([1, 6696]), input split length: 3019
inputs.shape: torch.Size([1, 4434]), input split length: 2226


 56%|█████▋    | 6943/12315 [51:22<17:16,  5.18it/s]

inputs.shape: torch.Size([1, 511]), input split length: 268


 56%|█████▋    | 6950/12315 [51:35<24:18,  3.68it/s]

inputs.shape: torch.Size([1, 2290]), input split length: 1326


 57%|█████▋    | 6967/12315 [51:43<26:33,  3.36it/s]

inputs.shape: torch.Size([1, 4393]), input split length: 1897


 57%|█████▋    | 6969/12315 [51:58<41:19,  2.16it/s]

inputs.shape: torch.Size([1, 975]), input split length: 620


 57%|█████▋    | 6991/12315 [51:58<30:56,  2.87it/s]

inputs.shape: torch.Size([1, 2950]), input split length: 1019


 57%|█████▋    | 7004/12315 [52:00<27:06,  3.26it/s]

inputs.shape: torch.Size([1, 5209]), input split length: 2360


 57%|█████▋    | 7009/12315 [52:16<50:06,  1.76it/s]

inputs.shape: torch.Size([1, 4363]), input split length: 2080


 57%|█████▋    | 7036/12315 [52:30<49:16,  1.79it/s]

inputs.shape: torch.Size([1, 1308]), input split length: 664


 57%|█████▋    | 7059/12315 [52:44<49:41,  1.76it/s]

inputs.shape: torch.Size([1, 5430]), input split length: 2687


 57%|█████▋    | 7073/12315 [53:00<1:00:38,  1.44it/s]

inputs.shape: torch.Size([1, 5173]), input split length: 3022


 58%|█████▊    | 7098/12315 [53:15<58:00,  1.50it/s]  

Validation index 7120 skipped because input.shape: torch.Size([1, 6186]), input split length: 2705
inputs.shape: torch.Size([1, 294]), input split length: 185


 58%|█████▊    | 7164/12315 [53:28<33:58,  2.53it/s]

Validation index 7191 skipped because input.shape: torch.Size([1, 5781]), input split length: 2926
inputs.shape: torch.Size([1, 3380]), input split length: 1609


 58%|█████▊    | 7196/12315 [53:42<34:56,  2.44it/s]

inputs.shape: torch.Size([1, 740]), input split length: 510


 58%|█████▊    | 7202/12315 [53:43<33:05,  2.58it/s]

inputs.shape: torch.Size([1, 2843]), input split length: 1478


 58%|█████▊    | 7204/12315 [53:55<48:17,  1.76it/s]

inputs.shape: torch.Size([1, 4575]), input split length: 2220


 59%|█████▊    | 7218/12315 [54:10<58:45,  1.45it/s]

inputs.shape: torch.Size([1, 3910]), input split length: 2037


 59%|█████▉    | 7295/12315 [54:12<17:03,  4.91it/s]

Validation index 7242 skipped because input.shape: torch.Size([1, 8095]), input split length: 3809
Validation index 7294 skipped because input.shape: torch.Size([1, 15644]), input split length: 7524
inputs.shape: torch.Size([1, 4262]), input split length: 2226


 59%|█████▉    | 7311/12315 [54:27<28:03,  2.97it/s]

inputs.shape: torch.Size([1, 5349]), input split length: 2740


 59%|█████▉    | 7321/12315 [54:43<42:22,  1.96it/s]

inputs.shape: torch.Size([1, 4047]), input split length: 2231


 60%|█████▉    | 7366/12315 [54:50<27:56,  2.95it/s]

inputs.shape: torch.Size([1, 1755]), input split length: 1155


 60%|██████    | 7407/12315 [54:51<17:54,  4.57it/s]

inputs.shape: torch.Size([1, 1122]), input split length: 772


 60%|██████    | 7419/12315 [54:51<16:07,  5.06it/s]

inputs.shape: torch.Size([1, 3485]), input split length: 1831


 60%|██████    | 7443/12315 [54:53<13:29,  6.02it/s]

inputs.shape: torch.Size([1, 162]), input split length: 102


 60%|██████    | 7450/12315 [54:54<12:52,  6.30it/s]

inputs.shape: torch.Size([1, 1333]), input split length: 993


 61%|██████    | 7465/12315 [55:07<27:06,  2.98it/s]

inputs.shape: torch.Size([1, 1052]), input split length: 646


 61%|██████    | 7477/12315 [55:08<22:15,  3.62it/s]

Validation index 7491 skipped because input.shape: torch.Size([1, 6977]), input split length: 3487
inputs.shape: torch.Size([1, 4074]), input split length: 1870


 61%|██████    | 7503/12315 [55:13<18:52,  4.25it/s]

inputs.shape: torch.Size([1, 5393]), input split length: 3118


 61%|██████    | 7508/12315 [55:28<42:09,  1.90it/s]

inputs.shape: torch.Size([1, 3999]), input split length: 2177


 61%|██████    | 7522/12315 [55:33<38:06,  2.10it/s]

inputs.shape: torch.Size([1, 2308]), input split length: 1246


 61%|██████    | 7532/12315 [55:47<54:05,  1.47it/s]

inputs.shape: torch.Size([1, 3070]), input split length: 2027


 61%|██████    | 7536/12315 [55:49<51:28,  1.55it/s]

inputs.shape: torch.Size([1, 805]), input split length: 479


 61%|██████    | 7542/12315 [55:49<43:03,  1.85it/s]

inputs.shape: torch.Size([1, 3553]), input split length: 2086


 61%|██████▏   | 7544/12315 [55:51<46:26,  1.71it/s]

inputs.shape: torch.Size([1, 686]), input split length: 371


 61%|██████▏   | 7552/12315 [55:52<33:14,  2.39it/s]

inputs.shape: torch.Size([1, 290]), input split length: 151


 62%|██████▏   | 7580/12315 [55:53<13:27,  5.87it/s]

inputs.shape: torch.Size([1, 4728]), input split length: 2408


 62%|██████▏   | 7605/12315 [56:08<28:22,  2.77it/s]

inputs.shape: torch.Size([1, 4771]), input split length: 2628


 62%|██████▏   | 7620/12315 [56:23<42:27,  1.84it/s]

Validation index 7620 skipped because input.shape: torch.Size([1, 9158]), input split length: 2938
inputs.shape: torch.Size([1, 3677]), input split length: 2021


 62%|██████▏   | 7622/12315 [56:37<1:08:20,  1.14it/s]

inputs.shape: torch.Size([1, 4537]), input split length: 2422


 62%|██████▏   | 7644/12315 [56:52<1:01:23,  1.27it/s]

Validation index 7656 skipped because input.shape: torch.Size([1, 6541]), input split length: 2093
inputs.shape: torch.Size([1, 5100]), input split length: 3024


 62%|██████▏   | 7659/12315 [57:08<1:06:42,  1.16it/s]

inputs.shape: torch.Size([1, 5034]), input split length: 1806


 62%|██████▏   | 7681/12315 [57:23<1:01:35,  1.25it/s]

inputs.shape: torch.Size([1, 2988]), input split length: 1727


 62%|██████▏   | 7689/12315 [57:25<53:44,  1.43it/s]  

inputs.shape: torch.Size([1, 4540]), input split length: 2589


 62%|██████▏   | 7693/12315 [57:40<1:18:29,  1.02s/it]

inputs.shape: torch.Size([1, 1994]), input split length: 981


 63%|██████▎   | 7706/12315 [57:41<54:32,  1.41it/s]  

inputs.shape: torch.Size([1, 330]), input split length: 213
inputs.shape: torch.Size([1, 274]), input split length: 176


 63%|██████▎   | 7708/12315 [57:42<53:36,  1.43it/s]

Validation index 7729 skipped because input.shape: torch.Size([1, 5502]), input split length: 2582
inputs.shape: torch.Size([1, 1869]), input split length: 1036


 63%|██████▎   | 7737/12315 [57:45<27:50,  2.74it/s]

inputs.shape: torch.Size([1, 5169]), input split length: 2331


 63%|██████▎   | 7743/12315 [58:01<52:42,  1.45it/s]

inputs.shape: torch.Size([1, 2396]), input split length: 1422


 63%|██████▎   | 7747/12315 [58:02<49:07,  1.55it/s]

inputs.shape: torch.Size([1, 566]), input split length: 343


 63%|██████▎   | 7756/12315 [58:03<36:32,  2.08it/s]

inputs.shape: torch.Size([1, 348]), input split length: 205


 63%|██████▎   | 7760/12315 [58:16<1:08:14,  1.11it/s]

Validation index 7772 skipped because input.shape: torch.Size([1, 9340]), input split length: 4453
inputs.shape: torch.Size([1, 3965]), input split length: 2076


 63%|██████▎   | 7812/12315 [58:30<33:18,  2.25it/s]  

inputs.shape: torch.Size([1, 2457]), input split length: 1402


 63%|██████▎   | 7817/12315 [58:32<32:01,  2.34it/s]

Validation index 7844 skipped because input.shape: torch.Size([1, 9217]), input split length: 5424
inputs.shape: torch.Size([1, 357]), input split length: 241


 64%|██████▍   | 7854/12315 [58:37<21:39,  3.43it/s]

inputs.shape: torch.Size([1, 454]), input split length: 316


 64%|██████▍   | 7855/12315 [58:38<22:03,  3.37it/s]

inputs.shape: torch.Size([1, 3440]), input split length: 1990


 64%|██████▍   | 7856/12315 [58:52<48:56,  1.52it/s]

inputs.shape: torch.Size([1, 1729]), input split length: 959


 64%|██████▍   | 7866/12315 [58:53<37:37,  1.97it/s]

inputs.shape: torch.Size([1, 567]), input split length: 358


 64%|██████▍   | 7877/12315 [58:54<27:30,  2.69it/s]

inputs.shape: torch.Size([1, 2647]), input split length: 1505


 64%|██████▍   | 7880/12315 [58:55<28:15,  2.62it/s]

inputs.shape: torch.Size([1, 839]), input split length: 571


 64%|██████▍   | 7884/12315 [58:56<25:38,  2.88it/s]

inputs.shape: torch.Size([1, 3698]), input split length: 2058


 64%|██████▍   | 7905/12315 [58:58<15:33,  4.72it/s]

inputs.shape: torch.Size([1, 2483]), input split length: 1366


 64%|██████▍   | 7910/12315 [58:59<16:09,  4.54it/s]

inputs.shape: torch.Size([1, 2046]), input split length: 1027


 64%|██████▍   | 7940/12315 [59:13<25:53,  2.82it/s]

inputs.shape: torch.Size([1, 535]), input split length: 346


 65%|██████▍   | 7953/12315 [59:13<19:53,  3.65it/s]

inputs.shape: torch.Size([1, 1445]), input split length: 725


 65%|██████▍   | 7955/12315 [59:27<45:25,  1.60it/s]

inputs.shape: torch.Size([1, 1541]), input split length: 1022


 65%|██████▍   | 7957/12315 [59:28<44:35,  1.63it/s]

Validation index 7958 skipped because input.shape: torch.Size([1, 5618]), input split length: 2272
inputs.shape: torch.Size([1, 1272]), input split length: 952


 65%|██████▍   | 7970/12315 [59:29<28:24,  2.55it/s]

inputs.shape: torch.Size([1, 1004]), input split length: 694


 65%|██████▍   | 7994/12315 [59:29<14:51,  4.85it/s]

inputs.shape: torch.Size([1, 991]), input split length: 519


 65%|██████▍   | 7998/12315 [59:30<14:39,  4.91it/s]

inputs.shape: torch.Size([1, 4365]), input split length: 2488


 65%|██████▌   | 8007/12315 [59:45<41:31,  1.73it/s]

Validation index 8007 skipped because input.shape: torch.Size([1, 8896]), input split length: 3953
inputs.shape: torch.Size([1, 3383]), input split length: 1995


 65%|██████▌   | 8020/12315 [59:59<54:24,  1.32it/s]

Validation index 8100 skipped because input.shape: torch.Size([1, 6094]), input split length: 3088
inputs.shape: torch.Size([1, 4905]), input split length: 2550


 66%|██████▌   | 8105/12315 [1:00:14<22:08,  3.17it/s]

inputs.shape: torch.Size([1, 3775]), input split length: 2510


 66%|██████▌   | 8142/12315 [1:00:17<16:13,  4.29it/s]

Validation index 8144 skipped because input.shape: torch.Size([1, 6959]), input split length: 2993
inputs.shape: torch.Size([1, 3339]), input split length: 2092


 66%|██████▌   | 8151/12315 [1:00:19<16:08,  4.30it/s]

inputs.shape: torch.Size([1, 5399]), input split length: 2669


 66%|██████▌   | 8152/12315 [1:00:35<33:07,  2.09it/s]

inputs.shape: torch.Size([1, 587]), input split length: 356


 66%|██████▋   | 8168/12315 [1:00:35<24:56,  2.77it/s]

inputs.shape: torch.Size([1, 1118]), input split length: 530


 67%|██████▋   | 8196/12315 [1:00:36<15:38,  4.39it/s]

inputs.shape: torch.Size([1, 581]), input split length: 346


 67%|██████▋   | 8232/12315 [1:00:49<19:27,  3.50it/s]

inputs.shape: torch.Size([1, 3435]), input split length: 1578


 67%|██████▋   | 8235/12315 [1:01:03<34:00,  2.00it/s]

inputs.shape: torch.Size([1, 2281]), input split length: 867


 67%|██████▋   | 8236/12315 [1:01:05<35:06,  1.94it/s]

Validation index 8239 skipped because input.shape: torch.Size([1, 6073]), input split length: 3008
inputs.shape: torch.Size([1, 3968]), input split length: 1807


 67%|██████▋   | 8241/12315 [1:01:10<40:01,  1.70it/s]

inputs.shape: torch.Size([1, 3366]), input split length: 1870
inputs.shape: torch.Size([1, 744]), input split length: 466


 67%|██████▋   | 8243/12315 [1:01:12<43:35,  1.56it/s]

inputs.shape: torch.Size([1, 3788]), input split length: 2122


 67%|██████▋   | 8256/12315 [1:01:27<56:17,  1.20it/s]

inputs.shape: torch.Size([1, 2375]), input split length: 1255


 67%|██████▋   | 8258/12315 [1:01:28<55:10,  1.23it/s]

inputs.shape: torch.Size([1, 903]), input split length: 602


 67%|██████▋   | 8291/12315 [1:01:29<19:48,  3.39it/s]

inputs.shape: torch.Size([1, 3530]), input split length: 1932


 67%|██████▋   | 8307/12315 [1:01:33<19:14,  3.47it/s]

inputs.shape: torch.Size([1, 767]), input split length: 445


 68%|██████▊   | 8379/12315 [1:01:34<06:40,  9.82it/s]

inputs.shape: torch.Size([1, 994]), input split length: 703


 69%|██████▊   | 8447/12315 [1:01:35<03:47, 17.04it/s]

inputs.shape: torch.Size([1, 2270]), input split length: 1122


 69%|██████▊   | 8455/12315 [1:01:38<05:19, 12.09it/s]

inputs.shape: torch.Size([1, 4795]), input split length: 1980


 69%|██████▉   | 8488/12315 [1:01:53<12:33,  5.08it/s]

inputs.shape: torch.Size([1, 5032]), input split length: 2409


 69%|██████▉   | 8490/12315 [1:02:08<24:36,  2.59it/s]

inputs.shape: torch.Size([1, 2160]), input split length: 1035


 69%|██████▉   | 8510/12315 [1:02:09<18:54,  3.35it/s]

inputs.shape: torch.Size([1, 4335]), input split length: 2471


 69%|██████▉   | 8540/12315 [1:02:24<23:15,  2.70it/s]

inputs.shape: torch.Size([1, 1959]), input split length: 1175


 69%|██████▉   | 8542/12315 [1:02:38<36:51,  1.71it/s]

inputs.shape: torch.Size([1, 1940]), input split length: 1126


 69%|██████▉   | 8544/12315 [1:02:39<36:43,  1.71it/s]

inputs.shape: torch.Size([1, 1986]), input split length: 879


 69%|██████▉   | 8547/12315 [1:02:40<35:27,  1.77it/s]

inputs.shape: torch.Size([1, 590]), input split length: 390


 69%|██████▉   | 8550/12315 [1:02:41<32:48,  1.91it/s]

inputs.shape: torch.Size([1, 4047]), input split length: 2164


 70%|██████▉   | 8581/12315 [1:02:43<15:14,  4.08it/s]

inputs.shape: torch.Size([1, 4066]), input split length: 1842


 70%|██████▉   | 8618/12315 [1:02:58<19:35,  3.15it/s]

Validation index 8617 skipped because input.shape: torch.Size([1, 35065]), input split length: 10210
inputs.shape: torch.Size([1, 2397]), input split length: 1263


 70%|███████   | 8627/12315 [1:03:12<34:26,  1.78it/s]

inputs.shape: torch.Size([1, 3275]), input split length: 1858


 70%|███████   | 8641/12315 [1:03:14<26:25,  2.32it/s]

inputs.shape: torch.Size([1, 2381]), input split length: 1043


 71%|███████   | 8695/12315 [1:03:15<10:34,  5.71it/s]

inputs.shape: torch.Size([1, 567]), input split length: 345


 71%|███████   | 8703/12315 [1:03:28<21:08,  2.85it/s]

inputs.shape: torch.Size([1, 1471]), input split length: 829


 71%|███████   | 8736/12315 [1:03:42<22:25,  2.66it/s]

inputs.shape: torch.Size([1, 3195]), input split length: 1953


 71%|███████   | 8753/12315 [1:03:48<22:23,  2.65it/s]

inputs.shape: torch.Size([1, 2042]), input split length: 1359


 71%|███████   | 8756/12315 [1:04:02<35:52,  1.65it/s]

inputs.shape: torch.Size([1, 2702]), input split length: 1667


 71%|███████   | 8770/12315 [1:04:16<42:00,  1.41it/s]

inputs.shape: torch.Size([1, 598]), input split length: 400


 71%|███████▏  | 8790/12315 [1:04:16<27:35,  2.13it/s]

inputs.shape: torch.Size([1, 1381]), input split length: 721


 72%|███████▏  | 8811/12315 [1:04:30<30:53,  1.89it/s]

Validation index 8822 skipped because input.shape: torch.Size([1, 6770]), input split length: 3411
inputs.shape: torch.Size([1, 874]), input split length: 552


 72%|███████▏  | 8836/12315 [1:04:31<19:55,  2.91it/s]

inputs.shape: torch.Size([1, 565]), input split length: 358


 72%|███████▏  | 8853/12315 [1:04:31<15:05,  3.82it/s]

inputs.shape: torch.Size([1, 3464]), input split length: 1849


 72%|███████▏  | 8861/12315 [1:04:46<28:21,  2.03it/s]

inputs.shape: torch.Size([1, 644]), input split length: 436


 72%|███████▏  | 8880/12315 [1:04:46<19:18,  2.97it/s]

inputs.shape: torch.Size([1, 99]), input split length: 45


 72%|███████▏  | 8890/12315 [1:04:59<30:24,  1.88it/s]

inputs.shape: torch.Size([1, 956]), input split length: 527


 72%|███████▏  | 8896/12315 [1:05:13<44:37,  1.28it/s]

inputs.shape: torch.Size([1, 4465]), input split length: 2395


 72%|███████▏  | 8917/12315 [1:05:28<42:45,  1.32it/s]

inputs.shape: torch.Size([1, 384]), input split length: 260


 73%|███████▎  | 8956/12315 [1:05:41<29:51,  1.87it/s]

inputs.shape: torch.Size([1, 2349]), input split length: 923


 73%|███████▎  | 8965/12315 [1:05:42<26:31,  2.11it/s]

inputs.shape: torch.Size([1, 977]), input split length: 629


 73%|███████▎  | 8976/12315 [1:05:55<34:43,  1.60it/s]

Validation index 8995 skipped because input.shape: torch.Size([1, 6483]), input split length: 3473
inputs.shape: torch.Size([1, 3553]), input split length: 2033


 73%|███████▎  | 9004/12315 [1:06:05<27:39,  2.00it/s]

inputs.shape: torch.Size([1, 3134]), input split length: 1757


 73%|███████▎  | 9007/12315 [1:06:19<41:58,  1.31it/s]

inputs.shape: torch.Size([1, 353]), input split length: 140


 73%|███████▎  | 9023/12315 [1:06:19<29:01,  1.89it/s]

inputs.shape: torch.Size([1, 3349]), input split length: 1674


 73%|███████▎  | 9043/12315 [1:06:34<32:34,  1.67it/s]

inputs.shape: torch.Size([1, 3737]), input split length: 2389


 74%|███████▎  | 9060/12315 [1:06:38<26:53,  2.02it/s]

inputs.shape: torch.Size([1, 1175]), input split length: 493


 74%|███████▎  | 9068/12315 [1:06:39<23:09,  2.34it/s]

inputs.shape: torch.Size([1, 642]), input split length: 398


 74%|███████▍  | 9091/12315 [1:06:40<14:08,  3.80it/s]

inputs.shape: torch.Size([1, 5076]), input split length: 2408


 74%|███████▍  | 9094/12315 [1:06:55<32:47,  1.64it/s]

inputs.shape: torch.Size([1, 4361]), input split length: 2007


 74%|███████▍  | 9103/12315 [1:07:10<45:50,  1.17it/s]

inputs.shape: torch.Size([1, 4371]), input split length: 2436


 74%|███████▍  | 9106/12315 [1:07:25<1:07:51,  1.27s/it]

inputs.shape: torch.Size([1, 3510]), input split length: 1724


 74%|███████▍  | 9141/12315 [1:07:27<27:03,  1.96it/s]  

inputs.shape: torch.Size([1, 4888]), input split length: 2121


 74%|███████▍  | 9165/12315 [1:07:42<29:19,  1.79it/s]

inputs.shape: torch.Size([1, 2194]), input split length: 1361


 75%|███████▍  | 9191/12315 [1:07:56<28:28,  1.83it/s]

inputs.shape: torch.Size([1, 4131]), input split length: 2214


 75%|███████▍  | 9207/12315 [1:08:11<33:06,  1.56it/s]

inputs.shape: torch.Size([1, 5262]), input split length: 2057


 75%|███████▍  | 9224/12315 [1:08:26<36:52,  1.40it/s]

inputs.shape: torch.Size([1, 1645]), input split length: 967


 75%|███████▌  | 9239/12315 [1:08:40<39:12,  1.31it/s]

inputs.shape: torch.Size([1, 417]), input split length: 241


 75%|███████▌  | 9275/12315 [1:08:41<21:03,  2.41it/s]

inputs.shape: torch.Size([1, 790]), input split length: 577


 75%|███████▌  | 9291/12315 [1:08:54<25:39,  1.96it/s]

inputs.shape: torch.Size([1, 5005]), input split length: 2928


 76%|███████▌  | 9334/12315 [1:09:09<21:48,  2.28it/s]

inputs.shape: torch.Size([1, 4376]), input split length: 2342


 76%|███████▌  | 9347/12315 [1:09:24<27:31,  1.80it/s]

inputs.shape: torch.Size([1, 366]), input split length: 227


 76%|███████▌  | 9351/12315 [1:09:37<36:36,  1.35it/s]

inputs.shape: torch.Size([1, 761]), input split length: 377


 76%|███████▌  | 9356/12315 [1:09:38<33:16,  1.48it/s]

inputs.shape: torch.Size([1, 2178]), input split length: 1235


 76%|███████▌  | 9368/12315 [1:09:51<39:30,  1.24it/s]

inputs.shape: torch.Size([1, 355]), input split length: 251


 76%|███████▌  | 9382/12315 [1:09:52<27:38,  1.77it/s]

inputs.shape: torch.Size([1, 3050]), input split length: 1853


 76%|███████▌  | 9388/12315 [1:10:06<41:31,  1.17it/s]

inputs.shape: torch.Size([1, 540]), input split length: 337


 76%|███████▌  | 9390/12315 [1:10:11<47:49,  1.02it/s]

inputs.shape: torch.Size([1, 2141]), input split length: 1481


 77%|███████▋  | 9445/12315 [1:10:25<21:07,  2.26it/s]

inputs.shape: torch.Size([1, 4304]), input split length: 2625


 77%|███████▋  | 9459/12315 [1:10:40<27:10,  1.75it/s]

inputs.shape: torch.Size([1, 489]), input split length: 330


 77%|███████▋  | 9470/12315 [1:10:40<22:21,  2.12it/s]

inputs.shape: torch.Size([1, 3551]), input split length: 1926


 77%|███████▋  | 9474/12315 [1:10:42<22:33,  2.10it/s]

inputs.shape: torch.Size([1, 2492]), input split length: 1174


 77%|███████▋  | 9494/12315 [1:10:56<23:15,  2.02it/s]

Validation index 9493 skipped because input.shape: torch.Size([1, 12807]), input split length: 5230
inputs.shape: torch.Size([1, 663]), input split length: 429


 77%|███████▋  | 9531/12315 [1:10:57<10:27,  4.43it/s]

inputs.shape: torch.Size([1, 2352]), input split length: 1394


 77%|███████▋  | 9543/12315 [1:11:11<19:10,  2.41it/s]

inputs.shape: torch.Size([1, 1504]), input split length: 626


 78%|███████▊  | 9558/12315 [1:11:12<14:40,  3.13it/s]

inputs.shape: torch.Size([1, 555]), input split length: 364


 78%|███████▊  | 9574/12315 [1:11:12<10:49,  4.22it/s]

inputs.shape: torch.Size([1, 469]), input split length: 274


 78%|███████▊  | 9578/12315 [1:11:13<10:23,  4.39it/s]

Validation index 9580 skipped because input.shape: torch.Size([1, 7409]), input split length: 2485
inputs.shape: torch.Size([1, 555]), input split length: 393


 78%|███████▊  | 9607/12315 [1:11:13<05:40,  7.96it/s]

inputs.shape: torch.Size([1, 3983]), input split length: 1544


 78%|███████▊  | 9630/12315 [1:11:28<13:53,  3.22it/s]

inputs.shape: torch.Size([1, 3319]), input split length: 1252


 79%|███████▊  | 9673/12315 [1:11:30<07:51,  5.60it/s]

Validation index 9682 skipped because input.shape: torch.Size([1, 10211]), input split length: 5040
inputs.shape: torch.Size([1, 693]), input split length: 356


 79%|███████▉  | 9702/12315 [1:11:31<05:37,  7.75it/s]

inputs.shape: torch.Size([1, 4976]), input split length: 2588


 79%|███████▉  | 9710/12315 [1:11:46<14:14,  3.05it/s]

inputs.shape: torch.Size([1, 2084]), input split length: 1025


 79%|███████▉  | 9723/12315 [1:11:47<12:01,  3.59it/s]

inputs.shape: torch.Size([1, 3162]), input split length: 1323


 79%|███████▉  | 9727/12315 [1:11:49<12:28,  3.46it/s]

inputs.shape: torch.Size([1, 1545]), input split length: 739


 79%|███████▉  | 9743/12315 [1:11:50<09:13,  4.65it/s]

inputs.shape: torch.Size([1, 4897]), input split length: 2386


 79%|███████▉  | 9745/12315 [1:12:05<26:28,  1.62it/s]

inputs.shape: torch.Size([1, 4584]), input split length: 2535


 79%|███████▉  | 9748/12315 [1:12:20<45:59,  1.08s/it]

inputs.shape: torch.Size([1, 1411]), input split length: 584


 79%|███████▉  | 9749/12315 [1:12:21<45:38,  1.07s/it]

inputs.shape: torch.Size([1, 633]), input split length: 353


 79%|███████▉  | 9767/12315 [1:12:28<29:29,  1.44it/s]

inputs.shape: torch.Size([1, 3271]), input split length: 1733


 79%|███████▉  | 9774/12315 [1:12:42<42:34,  1.01s/it]

inputs.shape: torch.Size([1, 1038]), input split length: 570


 79%|███████▉  | 9775/12315 [1:12:43<42:02,  1.01it/s]

inputs.shape: torch.Size([1, 430]), input split length: 289


 80%|███████▉  | 9793/12315 [1:12:43<19:56,  2.11it/s]

inputs.shape: torch.Size([1, 611]), input split length: 450


 80%|███████▉  | 9797/12315 [1:12:56<37:02,  1.13it/s]

inputs.shape: torch.Size([1, 3911]), input split length: 2209


 80%|███████▉  | 9806/12315 [1:12:59<28:35,  1.46it/s]

inputs.shape: torch.Size([1, 3607]), input split length: 1611


 80%|███████▉  | 9808/12315 [1:13:13<53:43,  1.29s/it]

Validation index 9820 skipped because input.shape: torch.Size([1, 5953]), input split length: 2529
inputs.shape: torch.Size([1, 3891]), input split length: 1954


 80%|███████▉  | 9835/12315 [1:13:28<33:25,  1.24it/s]

inputs.shape: torch.Size([1, 564]), input split length: 405


 80%|███████▉  | 9846/12315 [1:13:28<24:50,  1.66it/s]

Validation index 9849 skipped because input.shape: torch.Size([1, 7482]), input split length: 3990
inputs.shape: torch.Size([1, 740]), input split length: 426


 80%|████████  | 9852/12315 [1:13:29<21:16,  1.93it/s]

inputs.shape: torch.Size([1, 151]), input split length: 80


 80%|████████  | 9854/12315 [1:13:29<20:22,  2.01it/s]

inputs.shape: torch.Size([1, 373]), input split length: 235


 80%|████████  | 9884/12315 [1:13:30<08:03,  5.02it/s]

inputs.shape: torch.Size([1, 1594]), input split length: 675


 80%|████████  | 9900/12315 [1:13:31<06:11,  6.50it/s]

inputs.shape: torch.Size([1, 2092]), input split length: 1278


 81%|████████  | 9958/12315 [1:13:44<08:00,  4.91it/s]

inputs.shape: torch.Size([1, 4859]), input split length: 2377


 81%|████████  | 9959/12315 [1:14:00<16:53,  2.32it/s]

Validation index 9961 skipped because input.shape: torch.Size([1, 6749]), input split length: 2927
inputs.shape: torch.Size([1, 328]), input split length: 203


 81%|████████  | 9966/12315 [1:14:00<15:03,  2.60it/s]

inputs.shape: torch.Size([1, 3938]), input split length: 2178


 81%|████████  | 9992/12315 [1:14:15<17:39,  2.19it/s]

inputs.shape: torch.Size([1, 2883]), input split length: 1723


 81%|████████  | 10002/12315 [1:14:29<24:17,  1.59it/s]

inputs.shape: torch.Size([1, 2877]), input split length: 1622


 82%|████████▏ | 10042/12315 [1:14:43<18:23,  2.06it/s]

inputs.shape: torch.Size([1, 1176]), input split length: 814


 82%|████████▏ | 10047/12315 [1:14:43<17:18,  2.18it/s]

inputs.shape: torch.Size([1, 401]), input split length: 224


 82%|████████▏ | 10074/12315 [1:14:44<10:41,  3.50it/s]

inputs.shape: torch.Size([1, 1905]), input split length: 889


 82%|████████▏ | 10083/12315 [1:14:45<09:38,  3.86it/s]

inputs.shape: torch.Size([1, 513]), input split length: 369


 82%|████████▏ | 10088/12315 [1:14:58<19:20,  1.92it/s]

inputs.shape: torch.Size([1, 707]), input split length: 429


 82%|████████▏ | 10098/12315 [1:14:59<15:12,  2.43it/s]

inputs.shape: torch.Size([1, 5261]), input split length: 2271


In [None]:
len(processed_paper)

In [None]:
len_missed = [x[1] for x in idx_skip]
len_missed[:2]

In [None]:
np.min(len_missed)

In [None]:
np.max(len_missed)

In [None]:
np.mean(len_missed)

In [None]:
q

In [None]:

Llama 
===============
len(processed_paper)
1479


len_missed = [x[1] for x in idx_skip]
len_missed[:2]
[3999, 3400]
np.min(len_missed)
1928
np.max(len_missed)
25557
np.mean(len_missed)
3784.1600985221676

Mistral 
===============



In [None]:
# from IPython.paths import get_ipython_dir
# import os
# print(os.path.join(get_ipython_dir(), 'profile_default'))

len(preds)
# len(labels)

In [None]:
preds[:3]

In [None]:
labels[:2]

In [None]:
results = Metrics.evaluate_property_wise_json_based(label_list=labels, prediction_list=preds)
results.update(Metrics.evaluate_rouge(label_list=labels, prediction_list=preds))

print(f"Test data {script_args.test_dataset}")
print(f"Test ckpt {script_args.test_ckpt}")
print(f"Partial THRESHOLD {THRESHOLD}")
print(f"Total index skipped {len(idx_skip)}")
print(f"Index skipped {idx_skip}")

print("##################################################################################")
print(f"Results:")
for key, value in results.items():
    print(f"{key}: {value}")
print("##################################################################################")