# Task: Bert Model을 사용하여 BBC 뉴스 기사의 category를 분류

# Load data & Import packages

In [None]:
%%capture
!pip install transformers

In [None]:
import pandas as pd
import torch
import numpy as np
from transformers import BertTokenizer, BertModel
from torch import nn
from torch.optim import Adam
from tqdm import tqdm

In [None]:
df = pd.read_csv('bbc-text.csv')

In [None]:
df.head(10)

Unnamed: 0,category,text
0,tech,tv future in the hands of viewers with home th...
1,business,worldcom boss left books alone former worldc...
2,sport,tigers wary of farrell gamble leicester say ...
3,sport,yeading face newcastle in fa cup premiership s...
4,entertainment,ocean s twelve raids box office ocean s twelve...
5,politics,howard hits back at mongrel jibe michael howar...
6,politics,blair prepares to name poll date tony blair is...
7,sport,henman hopes ended in dubai third seed tim hen...
8,sport,wilkinson fit to face edinburgh england captai...
9,entertainment,last star wars not for children the sixth an...


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2225 entries, 0 to 2224
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   category  2225 non-null   object
 1   text      2225 non-null   object
dtypes: object(2)
memory usage: 34.9+ KB


In [None]:
df.groupby('category').count()

Unnamed: 0_level_0,text
category,Unnamed: 1_level_1
business,510
entertainment,386
politics,417
sport,511
tech,401


# BertTokenizer

토크나이저로 pretrain된 BERT의 BertTokenizer를 갖고 옵니다.

- bert-base-uncased : 108MB param, all lowercase
- bert-large-cased : 340MB param, both upper and lower
- bert-base-cased : 108MB param, multi language, both upper and lower


In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
labels = {'business':0,
          'entertainment':1,
          'sport':2,
          'tech':3,
          'politics':4
          }

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

# Dataset

In [None]:
class Dataset(torch.utils.data.Dataset):

    # 클래스 초기화 메서드
    def __init__(self, df):
        # 데이터프레임 df의 'category' 컬럼 값을 labels 딕셔너리를 통해 숫자로 변환하여 self.labels에 저장
        self.labels = [labels[label] for label in df['category']]
        # 데이터프레임 df의 'text' 컬럼 값을 토크나이저를 사용하여 토큰화하고 텐서로 변환하여 self.texts에 저장
        self.texts = [tokenizer(text,
                               padding='max_length', max_length=512, truncation=True,
                               return_tensors="pt") for text in df['text']]

    # 클래스의 레이블 목록을 반환하는 메서드
    def classes(self):
        return self.labels

    # 데이터셋의 길이를 반환하는 메서드
    def __len__(self):
        return len(self.labels)

    # 특정 인덱스의 배치 레이블을 반환하는 메서드
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    # 특정 인덱스의 배치 텍스트를 반환하는 메서드
    def get_batch_texts(self, idx):
        return self.texts[idx]

    # 특정 인덱스의 텍스트와 레이블을 반환하는 메서드
    def __getitem__(self, idx):
        # 해당 인덱스의 배치 텍스트를 가져옴
        batch_texts = self.get_batch_texts(idx)
        # 해당 인덱스의 배치 레이블을 가져옴
        batch_y = self.get_batch_labels(idx)

        # 배치 텍스트와 배치 레이블을 튜플로 반환
        return batch_texts, batch_y


# Train & Evaluate BertClassifier

pretrain된 BertModel을 불러옵니다. 다른 간단한 층들도 같이 쌓아줍니다.

- bert-base-cased: 12-layer, 768-hidden, 12-self attention heads, 110M parameters. Trained on cased English text.


다른 종류들의 pretrianed model은 아래 링크에서 확인할 수 있습니다.

https://huggingface.co/transformers/v2.9.1/pretrained_models.html

In [None]:
# PyTorch의 nn.Module을 상속받아 BERT 기반 분류기 클래스를 정의
class BertClassifier(nn.Module):

    # 클래스 초기화 메서드
    def __init__(self, dropout=0.5):
        # 부모 클래스의 초기화 메서드를 호출
        super(BertClassifier, self).__init__()

        # 미리 학습된 BERT 모델을 로드
        self.bert = BertModel.from_pretrained('bert-base-cased')
        # 드롭아웃 레이어를 설정, 드롭아웃 비율은 기본값 0.5
        self.dropout = nn.Dropout(dropout)
        # 선형 레이어를 설정, 입력 차원 768(BERT의 출력 차원)에서 출력 차원 5(분류 클래스 수)
        self.linear = nn.Linear(768, 5)
        # ReLU 활성화 함수 설정
        self.relu = nn.ReLU()

    # 순전파 메서드
    def forward(self, input_id, mask):
        # BERT 모델에 입력 아이디와 어텐션 마스크를 전달하여 출력 획득
        _, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
        # 드롭아웃 레이어를 통과시켜 과적합 방지
        dropout_output = self.dropout(pooled_output)
        # 선형 레이어를 통과시켜 예측값 생성
        linear_output = self.linear(dropout_output)
        # ReLU 활성화 함수를 적용하여 비선형성 추가
        final_layer = self.relu(linear_output)

        # 최종 출력값 반환
        return final_layer


In [None]:
def train(model, train_data, val_data, learning_rate, epochs):
    # 학습 데이터와 검증 데이터를 Dataset 클래스를 이용해 처리
    train, val = Dataset(train_data), Dataset(val_data)

    # 학습 데이터로더와 검증 데이터로더 생성, 배치 크기는 2로 설정
    train_dataloader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)

    # CUDA 사용 가능 여부 확인 및 장치 설정
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # 손실 함수와 옵티마이저 설정
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=learning_rate)

    # CUDA 사용 시 모델과 손실 함수를 GPU로 이동
    if use_cuda:
        model = model.cuda()
        criterion = criterion.cuda()

    # 지정된 에포크 수만큼 학습 반복
    for epoch_num in range(epochs):
        # 학습 데이터에 대한 정확도와 손실 초기화
        total_acc_train = 0
        total_loss_train = 0

        # 학습 데이터 배치 처리
        for train_input, train_label in tqdm(train_dataloader):
            # 학습 레이블과 입력 데이터의 어텐션 마스크, 입력 아이디를 GPU로 이동
            train_label = train_label.to(device)
            mask = train_input['attention_mask'].squeeze(1).to(device)
            input_id = train_input['input_ids'].squeeze(1).to(device)

            # 입력 데이터 형상 확인 및 출력
            print(f'input_id shape: {input_id.shape}')
            print(f'mask shape: {mask.shape}')

            # 모델에 입력 데이터를 전달하여 출력값 획득
            output = model(input_id, mask)

            # 손실 함수 계산
            batch_loss = criterion(output, train_label.long())
            total_loss_train += batch_loss.item()

            # 정확도 계산
            acc = (output.argmax(dim=1) == train_label).sum().item()
            total_acc_train += acc

            # 역전파 단계와 옵티마이저 업데이트
            model.zero_grad()
            batch_loss.backward()
            optimizer.step()

        # 검증 데이터에 대한 정확도와 손실 초기화
        total_acc_val = 0
        total_loss_val = 0

        # 검증 데이터 배치 처리, 그래디언트 계산 중지
        with torch.no_grad():
            for val_input, val_label in val_dataloader:
                # 검증 레이블과 입력 데이터의 어텐션 마스크, 입력 아이디를 GPU로 이동
                val_label = val_label.to(device)
                mask = val_input['attention_mask'].squeeze(1).to(device)
                input_id = val_input['input_ids'].squeeze(1).to(device)

                # 입력 데이터 형상 확인 및 출력
                print(f'input_id shape: {input_id.shape}')
                print(f'mask shape: {mask.shape}')

                # 모델에 입력 데이터를 전달하여 출력값 획득
                output = model(input_id, mask)

                # 손실 함수 계산
                batch_loss = criterion(output, val_label.long())
                total_loss_val += batch_loss.item()

                # 정확도 계산
                acc = (output.argmax(dim=1) == val_label).sum().item()
                total_acc_val += acc

        # 학습 및 검증 결과 출력
        print(
            f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} | '
            f'Train Accuracy: {total_acc_train / len(train_data): .3f} | '
            f'Val Loss: {total_loss_val / len(val_data): .3f} | '
            f'Val Accuracy: {total_acc_val / len(val_data): .3f}'
        )


In [None]:
# 모델을 평가하는 함수 정의
def evaluate(model, test_data):
    # 테스트 데이터를 Dataset 클래스를 이용해 처리
    test = Dataset(test_data)

    # 테스트 데이터로더 생성, 배치 크기는 2로 설정
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=2)

    # CUDA 사용 가능 여부 확인 및 장치 설정
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # CUDA 사용 시 모델을 GPU로 이동
    if use_cuda:
        model = model.cuda()

    # 테스트 데이터에 대한 정확도 초기화
    total_acc_test = 0

    # 그래디언트 계산 중지
    with torch.no_grad():
        # 테스트 데이터 배치 처리
        for test_input, test_label in test_dataloader:
            # 테스트 레이블과 입력 데이터의 어텐션 마스크, 입력 아이디를 GPU로 이동
            test_label = test_label.to(device)
            mask = test_input['attention_mask'].squeeze(1).to(device)
            input_id = test_input['input_ids'].squeeze(1).to(device)

            # 입력 데이터 형상 확인 및 출력
            print(f'input_id shape: {input_id.shape}')
            print(f'mask shape: {mask.shape}')

            # 모델에 입력 데이터를 전달하여 출력값 획득
            output = model(input_id, mask)

            # 정확도 계산
            acc = (output.argmax(dim=1) == test_label).sum().item()
            total_acc_test += acc

    # 테스트 결과 출력
    print(f'Test Accuracy: {total_acc_test / len(test_data): .3f}')


In [None]:
np.random.seed(112)
df_train, df_val, df_test = np.split(df.sample(frac=1, random_state=42),
                                     [int(.8*len(df)), int(.9*len(df))])

print(len(df_train),len(df_val), len(df_test))

1780 222 223


mask shape 관련 에러가 발생하여 mask의 형상을 squeeze(1)을 사용해 [batch_size, sequence_length]로 맞춰 BertModel이 기대하는 형식과 일치시켜 해결

In [None]:
EPOCHS = 2 #EPOCH 수 늘려보기!
model = BertClassifier()
LR = 1e-6

train(model, df_train, df_val, LR, EPOCHS)

  0%|          | 0/890 [00:00<?, ?it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  0%|          | 3/890 [00:01<04:14,  3.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 5/890 [00:01<02:44,  5.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 7/890 [00:01<02:07,  6.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 9/890 [00:01<01:50,  7.97it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 11/890 [00:01<01:42,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|▏         | 13/890 [00:02<01:38,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 15/890 [00:02<01:37,  8.99it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 17/890 [00:02<01:36,  9.08it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 19/890 [00:02<01:35,  9.16it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 21/890 [00:02<01:34,  9.17it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 23/890 [00:03<01:33,  9.26it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 25/890 [00:03<01:33,  9.28it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 27/890 [00:03<01:33,  9.22it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 29/890 [00:03<01:33,  9.22it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 31/890 [00:04<01:33,  9.19it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▎         | 33/890 [00:04<01:33,  9.15it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 35/890 [00:04<01:33,  9.16it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 37/890 [00:04<01:33,  9.15it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 39/890 [00:04<01:32,  9.18it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 41/890 [00:05<01:31,  9.25it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 43/890 [00:05<01:31,  9.26it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 45/890 [00:05<01:31,  9.21it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 47/890 [00:05<01:31,  9.18it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 49/890 [00:06<01:31,  9.20it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 51/890 [00:06<01:31,  9.14it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 53/890 [00:06<01:31,  9.14it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 55/890 [00:06<01:32,  9.05it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▋         | 57/890 [00:06<01:30,  9.20it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 59/890 [00:07<01:30,  9.21it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 61/890 [00:07<01:29,  9.21it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 63/890 [00:07<01:30,  9.12it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 65/890 [00:07<01:28,  9.28it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 67/890 [00:08<01:29,  9.22it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 69/890 [00:08<01:29,  9.15it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 71/890 [00:08<01:29,  9.16it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 73/890 [00:08<01:29,  9.12it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 75/890 [00:08<01:29,  9.13it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▊         | 77/890 [00:09<01:29,  9.12it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 79/890 [00:09<01:28,  9.12it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 81/890 [00:09<01:28,  9.13it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 83/890 [00:09<01:28,  9.17it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 85/890 [00:09<01:27,  9.19it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 87/890 [00:10<01:27,  9.21it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 89/890 [00:10<01:27,  9.19it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 91/890 [00:10<01:27,  9.13it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 93/890 [00:10<01:26,  9.21it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 95/890 [00:11<01:26,  9.17it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 97/890 [00:11<01:26,  9.20it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 99/890 [00:11<01:26,  9.18it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█▏        | 101/890 [00:11<01:27,  9.03it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 103/890 [00:11<01:25,  9.21it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 105/890 [00:12<01:25,  9.19it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 107/890 [00:12<01:25,  9.21it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 109/890 [00:12<01:25,  9.17it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 111/890 [00:12<01:25,  9.10it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 113/890 [00:13<01:25,  9.07it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 115/890 [00:13<01:25,  9.01it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 117/890 [00:13<01:23,  9.22it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 118/890 [00:13<01:37,  7.89it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▎        | 121/890 [00:13<01:26,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 123/890 [00:14<01:25,  9.01it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 125/890 [00:14<01:24,  9.08it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 127/890 [00:14<01:23,  9.09it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 129/890 [00:14<01:25,  8.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 131/890 [00:15<01:22,  9.23it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 133/890 [00:15<01:22,  9.16it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 135/890 [00:15<01:22,  9.13it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 137/890 [00:15<01:22,  9.16it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 139/890 [00:15<01:22,  9.15it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 141/890 [00:16<01:22,  9.11it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 143/890 [00:16<01:21,  9.13it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▋        | 145/890 [00:16<01:21,  9.11it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 147/890 [00:16<01:22,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 149/890 [00:17<01:21,  9.09it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 151/890 [00:17<01:21,  9.08it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 153/890 [00:17<01:21,  9.05it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 155/890 [00:17<01:21,  9.02it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 157/890 [00:17<01:20,  9.10it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 159/890 [00:18<01:20,  9.09it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 161/890 [00:18<01:19,  9.15it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 163/890 [00:18<01:19,  9.16it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▊        | 165/890 [00:18<01:19,  9.13it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 167/890 [00:18<01:18,  9.18it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 169/890 [00:19<01:18,  9.21it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 171/890 [00:19<01:18,  9.19it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 173/890 [00:19<01:18,  9.14it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 175/890 [00:19<01:19,  9.03it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 177/890 [00:20<01:17,  9.21it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 179/890 [00:20<01:18,  9.11it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 181/890 [00:20<01:16,  9.31it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 183/890 [00:20<01:18,  9.02it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 185/890 [00:20<01:17,  9.07it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 187/890 [00:21<01:17,  9.11it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 189/890 [00:21<01:17,  9.10it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██▏       | 191/890 [00:21<01:16,  9.11it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 193/890 [00:21<01:16,  9.10it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 195/890 [00:22<01:15,  9.16it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 197/890 [00:22<01:14,  9.24it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 199/890 [00:22<01:14,  9.21it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 201/890 [00:22<01:14,  9.20it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 203/890 [00:22<01:14,  9.19it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 205/890 [00:23<01:14,  9.16it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 207/890 [00:23<01:14,  9.15it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 209/890 [00:23<01:14,  9.15it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▎       | 211/890 [00:23<01:14,  9.14it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 213/890 [00:24<01:13,  9.15it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 215/890 [00:24<01:13,  9.14it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 217/890 [00:24<01:13,  9.12it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 219/890 [00:24<01:13,  9.11it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 221/890 [00:24<01:13,  9.11it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 223/890 [00:25<01:12,  9.14it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 225/890 [00:25<01:12,  9.16it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 227/890 [00:25<01:12,  9.17it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 229/890 [00:25<01:12,  9.16it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 231/890 [00:25<01:12,  9.12it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 233/890 [00:26<01:12,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▋       | 235/890 [00:26<01:12,  9.05it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 237/890 [00:26<01:12,  9.04it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 239/890 [00:26<01:11,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 241/890 [00:27<01:11,  9.07it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 243/890 [00:27<01:11,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 245/890 [00:27<01:11,  8.99it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 247/890 [00:27<01:10,  9.11it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 249/890 [00:27<01:10,  9.04it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 251/890 [00:28<01:10,  9.09it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 253/890 [00:28<01:10,  9.01it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▊       | 255/890 [00:28<01:09,  9.07it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 257/890 [00:28<01:09,  9.07it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 259/890 [00:29<01:09,  9.12it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 261/890 [00:29<01:09,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 263/890 [00:29<01:09,  9.00it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 265/890 [00:29<01:09,  9.04it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 267/890 [00:29<01:08,  9.05it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 269/890 [00:30<01:09,  8.95it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 271/890 [00:30<01:08,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 273/890 [00:30<01:08,  9.00it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 275/890 [00:30<01:07,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 277/890 [00:31<01:07,  9.04it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███▏      | 279/890 [00:31<01:08,  8.93it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 281/890 [00:31<01:07,  9.08it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 283/890 [00:31<01:06,  9.11it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 285/890 [00:31<01:06,  9.11it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 287/890 [00:32<01:06,  9.08it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 289/890 [00:32<01:06,  9.10it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 291/890 [00:32<01:06,  9.05it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 293/890 [00:32<01:06,  9.04it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 295/890 [00:33<01:06,  9.01it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 297/890 [00:33<01:06,  8.94it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▎      | 299/890 [00:33<01:05,  9.02it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 301/890 [00:33<01:04,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 303/890 [00:33<01:04,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 305/890 [00:34<01:04,  9.05it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 307/890 [00:34<01:04,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 309/890 [00:34<01:04,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 311/890 [00:34<01:03,  9.07it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 313/890 [00:35<01:03,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 315/890 [00:35<01:03,  9.05it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 317/890 [00:35<01:03,  9.03it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 319/890 [00:35<01:03,  9.01it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 320/890 [00:35<01:02,  9.10it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▋      | 323/890 [00:36<01:09,  8.21it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 325/890 [00:36<01:07,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 327/890 [00:36<01:03,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 329/890 [00:36<01:02,  8.93it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 331/890 [00:37<01:02,  9.00it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 333/890 [00:37<01:01,  9.03it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 335/890 [00:37<01:01,  9.04it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 337/890 [00:37<01:01,  9.02it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 339/890 [00:37<01:00,  9.04it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 341/890 [00:38<01:00,  9.03it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▊      | 343/890 [00:38<01:01,  8.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 345/890 [00:38<01:00,  9.07it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 347/890 [00:38<01:00,  8.99it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 349/890 [00:39<00:59,  9.04it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 351/890 [00:39<01:00,  8.95it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 353/890 [00:39<00:59,  8.97it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 355/890 [00:39<00:59,  8.95it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 357/890 [00:39<00:59,  9.00it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 359/890 [00:40<00:59,  8.99it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 361/890 [00:40<00:59,  8.89it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 363/890 [00:40<00:59,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 365/890 [00:40<00:58,  8.96it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 367/890 [00:41<00:58,  8.98it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████▏     | 369/890 [00:41<00:57,  9.00it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 371/890 [00:41<00:57,  8.99it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 373/890 [00:41<00:57,  9.03it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 375/890 [00:41<00:56,  9.09it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 377/890 [00:42<00:56,  9.02it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 379/890 [00:42<00:56,  9.01it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 381/890 [00:42<00:57,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 383/890 [00:42<00:56,  8.97it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 385/890 [00:43<00:56,  8.98it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 387/890 [00:43<00:55,  9.07it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▎     | 389/890 [00:43<00:55,  9.02it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 391/890 [00:43<00:55,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 393/890 [00:44<00:55,  9.02it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 395/890 [00:44<00:55,  8.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 397/890 [00:44<00:55,  8.92it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 399/890 [00:44<00:54,  8.98it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 401/890 [00:44<00:54,  8.95it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 403/890 [00:45<00:54,  8.98it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 405/890 [00:45<00:54,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 407/890 [00:45<00:54,  8.89it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 409/890 [00:45<00:53,  8.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 411/890 [00:46<00:53,  8.97it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▋     | 413/890 [00:46<00:53,  8.98it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 415/890 [00:46<00:53,  8.88it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 417/890 [00:46<00:53,  8.89it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 419/890 [00:46<00:52,  8.93it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 421/890 [00:47<00:51,  9.04it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 423/890 [00:47<00:52,  8.97it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 425/890 [00:47<00:53,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 427/890 [00:47<00:52,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 429/890 [00:48<00:51,  8.94it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 431/890 [00:48<00:51,  8.96it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▊     | 433/890 [00:48<00:50,  9.02it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 435/890 [00:48<00:51,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 437/890 [00:48<00:51,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 439/890 [00:49<00:50,  8.96it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 441/890 [00:49<00:50,  8.96it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 443/890 [00:49<00:49,  9.01it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 445/890 [00:49<00:50,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 447/890 [00:50<00:49,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 449/890 [00:50<00:49,  8.99it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 451/890 [00:50<00:49,  8.92it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 453/890 [00:50<00:50,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 455/890 [00:50<00:48,  8.93it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████▏    | 457/890 [00:51<00:48,  8.90it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 459/890 [00:51<00:48,  8.95it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 461/890 [00:51<00:47,  8.94it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 463/890 [00:51<00:47,  8.93it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 465/890 [00:52<00:47,  8.96it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 467/890 [00:52<00:47,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 469/890 [00:52<00:47,  8.89it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 471/890 [00:52<00:46,  8.94it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 473/890 [00:52<00:46,  8.99it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 475/890 [00:53<00:46,  8.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▎    | 477/890 [00:53<00:46,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 479/890 [00:53<00:46,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 481/890 [00:53<00:45,  8.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 483/890 [00:54<00:46,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 485/890 [00:54<00:45,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 487/890 [00:54<00:45,  8.92it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 489/890 [00:54<00:44,  8.96it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 491/890 [00:54<00:44,  8.96it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 493/890 [00:55<00:44,  8.92it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 495/890 [00:55<00:44,  8.92it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 497/890 [00:55<00:44,  8.88it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 499/890 [00:55<00:44,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▋    | 501/890 [00:56<00:44,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 503/890 [00:56<00:43,  8.89it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 505/890 [00:56<00:43,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 507/890 [00:56<00:43,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 509/890 [00:57<00:42,  8.89it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 511/890 [00:57<00:42,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 513/890 [00:57<00:42,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 515/890 [00:57<00:42,  8.92it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 517/890 [00:57<00:41,  8.95it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 519/890 [00:58<00:42,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▊    | 521/890 [00:58<00:42,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 523/890 [00:58<00:41,  8.95it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 525/890 [00:58<00:40,  8.95it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 527/890 [00:59<00:41,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 529/890 [00:59<00:40,  8.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 531/890 [00:59<00:40,  8.93it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 533/890 [00:59<00:40,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 535/890 [00:59<00:40,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 537/890 [01:00<00:39,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 539/890 [01:00<00:40,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 541/890 [01:00<00:39,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 543/890 [01:00<00:38,  8.90it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 545/890 [01:01<00:39,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████▏   | 547/890 [01:01<00:39,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 549/890 [01:01<00:38,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 551/890 [01:01<00:39,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 553/890 [01:02<00:37,  8.93it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 555/890 [01:02<00:38,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 557/890 [01:02<00:37,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 559/890 [01:02<00:37,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 561/890 [01:02<00:37,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 563/890 [01:03<00:36,  8.90it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 565/890 [01:03<00:36,  8.92it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▎   | 567/890 [01:03<00:36,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 569/890 [01:03<00:36,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 571/890 [01:04<00:35,  8.92it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 573/890 [01:04<00:36,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 575/890 [01:04<00:35,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 577/890 [01:04<00:36,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 579/890 [01:04<00:35,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 581/890 [01:05<00:34,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 583/890 [01:05<00:35,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 585/890 [01:05<00:34,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 587/890 [01:05<00:34,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 589/890 [01:06<00:34,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▋   | 591/890 [01:06<00:33,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 593/890 [01:06<00:32,  9.00it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 595/890 [01:06<00:33,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 597/890 [01:07<00:33,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 599/890 [01:07<00:33,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 601/890 [01:07<00:32,  8.89it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 603/890 [01:07<00:32,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 605/890 [01:07<00:32,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 607/890 [01:08<00:32,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 609/890 [01:08<00:32,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▊   | 611/890 [01:08<00:31,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 613/890 [01:08<00:31,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 615/890 [01:09<00:31,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 617/890 [01:09<00:31,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 619/890 [01:09<00:31,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 621/890 [01:09<00:30,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 623/890 [01:09<00:30,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 625/890 [01:10<00:29,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 627/890 [01:10<00:30,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 629/890 [01:10<00:29,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 631/890 [01:10<00:29,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 633/890 [01:11<00:29,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████▏  | 635/890 [01:11<00:28,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 637/890 [01:11<00:29,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 639/890 [01:11<00:28,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 641/890 [01:12<00:28,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 643/890 [01:12<00:28,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 645/890 [01:12<00:28,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 647/890 [01:12<00:27,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 649/890 [01:12<00:27,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 651/890 [01:13<00:27,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 653/890 [01:13<00:26,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▎  | 655/890 [01:13<00:26,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 657/890 [01:13<00:26,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 659/890 [01:14<00:26,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 661/890 [01:14<00:26,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 663/890 [01:14<00:26,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 665/890 [01:14<00:25,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 666/890 [01:14<00:25,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 669/890 [01:15<00:26,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 671/890 [01:15<00:25,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 673/890 [01:15<00:25,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 675/890 [01:15<00:24,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 677/890 [01:16<00:24,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▋  | 679/890 [01:16<00:24,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 681/890 [01:16<00:23,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 683/890 [01:16<00:23,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 685/890 [01:17<00:23,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 687/890 [01:17<00:23,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 689/890 [01:17<00:22,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 691/890 [01:17<00:23,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 693/890 [01:18<00:22,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 695/890 [01:18<00:22,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 697/890 [01:18<00:21,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▊  | 699/890 [01:18<00:22,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 701/890 [01:18<00:21,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 703/890 [01:19<00:21,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 705/890 [01:19<00:21,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 707/890 [01:19<00:20,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 709/890 [01:19<00:20,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 711/890 [01:20<00:20,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 713/890 [01:20<00:20,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 715/890 [01:20<00:20,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 717/890 [01:20<00:20,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 719/890 [01:21<00:19,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 721/890 [01:21<00:19,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 723/890 [01:21<00:19,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████▏ | 725/890 [01:21<00:19,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 727/890 [01:21<00:18,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 729/890 [01:22<00:18,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 731/890 [01:22<00:18,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 733/890 [01:22<00:18,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 735/890 [01:22<00:17,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 737/890 [01:23<00:17,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 739/890 [01:23<00:17,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 741/890 [01:23<00:17,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 743/890 [01:23<00:17,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▎ | 745/890 [01:24<00:16,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 747/890 [01:24<00:16,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 749/890 [01:24<00:16,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 751/890 [01:24<00:15,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 753/890 [01:24<00:15,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 755/890 [01:25<00:15,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 757/890 [01:25<00:15,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 759/890 [01:25<00:15,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 761/890 [01:25<00:14,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 763/890 [01:26<00:14,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 765/890 [01:26<00:14,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 767/890 [01:26<00:13,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▋ | 769/890 [01:26<00:14,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 771/890 [01:27<00:13,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 773/890 [01:27<00:13,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 775/890 [01:27<00:13,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 777/890 [01:27<00:12,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 779/890 [01:27<00:12,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 781/890 [01:28<00:12,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 783/890 [01:28<00:12,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 785/890 [01:28<00:12,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 787/890 [01:28<00:11,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▊ | 789/890 [01:29<00:11,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 791/890 [01:29<00:11,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 793/890 [01:29<00:11,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 795/890 [01:29<00:10,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 797/890 [01:29<00:10,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 799/890 [01:30<00:10,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 801/890 [01:30<00:10,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 803/890 [01:30<00:09,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 805/890 [01:30<00:09,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 807/890 [01:31<00:09,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 809/890 [01:31<00:09,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 811/890 [01:31<00:08,  8.88it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████▏| 813/890 [01:31<00:08,  8.90it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 815/890 [01:32<00:08,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 817/890 [01:32<00:08,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 819/890 [01:32<00:08,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 821/890 [01:32<00:07,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 823/890 [01:32<00:07,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 825/890 [01:33<00:07,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 827/890 [01:33<00:07,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 829/890 [01:33<00:06,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 831/890 [01:33<00:06,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▎| 833/890 [01:34<00:06,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 835/890 [01:34<00:06,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 837/890 [01:34<00:06,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 839/890 [01:34<00:05,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 841/890 [01:35<00:05,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 843/890 [01:35<00:05,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 845/890 [01:35<00:05,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 847/890 [01:35<00:04,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 849/890 [01:35<00:04,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 851/890 [01:36<00:04,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 853/890 [01:36<00:04,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 855/890 [01:36<00:03,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▋| 857/890 [01:36<00:03,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 859/890 [01:37<00:03,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 861/890 [01:37<00:03,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 863/890 [01:37<00:03,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 865/890 [01:37<00:02,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 867/890 [01:37<00:02,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 869/890 [01:38<00:02,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 871/890 [01:38<00:02,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 873/890 [01:38<00:01,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 875/890 [01:38<00:01,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▊| 877/890 [01:39<00:01,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 879/890 [01:39<00:01,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 881/890 [01:39<00:01,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 883/890 [01:39<00:00,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 885/890 [01:40<00:00,  8.94it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 887/890 [01:40<00:00,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 889/890 [01:40<00:00,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|██████████| 890/890 [01:40<00:00,  8.85it/s]


input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torc

  0%|          | 0/890 [00:00<?, ?it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  0%|          | 4/890 [00:00<01:28,  9.97it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 6/890 [00:00<01:34,  9.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 8/890 [00:00<01:36,  9.11it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 10/890 [00:01<01:37,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|▏         | 12/890 [00:01<01:37,  8.97it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 14/890 [00:01<01:39,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 16/890 [00:01<01:38,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 18/890 [00:01<01:38,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 20/890 [00:02<01:38,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 22/890 [00:02<01:38,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 24/890 [00:02<01:38,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 26/890 [00:02<01:38,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 28/890 [00:03<01:37,  8.88it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 30/890 [00:03<01:38,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▎         | 32/890 [00:03<01:38,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 34/890 [00:03<01:37,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 36/890 [00:04<01:37,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 38/890 [00:04<01:36,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 40/890 [00:04<01:35,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 42/890 [00:04<01:35,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 44/890 [00:04<01:35,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 46/890 [00:05<01:35,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 48/890 [00:05<01:36,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 50/890 [00:05<01:35,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 52/890 [00:05<01:33,  8.93it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 54/890 [00:06<01:35,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▋         | 56/890 [00:06<01:36,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 58/890 [00:06<01:33,  8.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 60/890 [00:06<01:34,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 62/890 [00:06<01:33,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 64/890 [00:07<01:35,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 66/890 [00:07<01:33,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 68/890 [00:07<01:34,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 70/890 [00:07<01:33,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 72/890 [00:08<01:32,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 74/890 [00:08<01:32,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▊         | 76/890 [00:08<01:32,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 78/890 [00:08<01:31,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 80/890 [00:09<01:32,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 82/890 [00:09<01:31,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 84/890 [00:09<01:30,  8.88it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 86/890 [00:09<01:31,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 88/890 [00:09<01:31,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 90/890 [00:10<01:30,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 92/890 [00:10<01:31,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 94/890 [00:10<01:29,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 96/890 [00:10<01:31,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 98/890 [00:11<01:29,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 100/890 [00:11<01:28,  8.90it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█▏        | 102/890 [00:11<01:28,  8.89it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 104/890 [00:11<01:29,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 106/890 [00:11<01:28,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 108/890 [00:12<01:29,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 110/890 [00:12<01:28,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 112/890 [00:12<01:28,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 114/890 [00:12<01:28,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 116/890 [00:13<01:28,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 118/890 [00:13<01:28,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 120/890 [00:13<01:28,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▎        | 122/890 [00:13<01:26,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 124/890 [00:14<01:26,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 126/890 [00:14<01:26,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 128/890 [00:14<01:26,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 130/890 [00:14<01:25,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 132/890 [00:14<01:26,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 134/890 [00:15<01:25,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 136/890 [00:15<01:25,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 138/890 [00:15<01:26,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 140/890 [00:15<01:24,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 142/890 [00:16<01:24,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 144/890 [00:16<01:25,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▋        | 146/890 [00:16<01:24,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 148/890 [00:16<01:25,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 150/890 [00:16<01:23,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 152/890 [00:17<01:23,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 154/890 [00:17<01:24,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 156/890 [00:17<01:23,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 158/890 [00:17<01:24,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 160/890 [00:18<01:22,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 162/890 [00:18<01:23,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 164/890 [00:18<01:22,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▊        | 166/890 [00:18<01:22,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 168/890 [00:19<01:21,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 170/890 [00:19<01:21,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 172/890 [00:19<01:21,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 174/890 [00:19<01:21,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 176/890 [00:19<01:21,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 178/890 [00:20<01:21,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 180/890 [00:20<01:20,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 182/890 [00:20<01:20,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 184/890 [00:20<01:20,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 186/890 [00:21<01:20,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 188/890 [00:21<01:19,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██▏       | 190/890 [00:21<01:19,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 192/890 [00:21<01:19,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 194/890 [00:21<01:18,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 196/890 [00:22<01:20,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 198/890 [00:22<01:18,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 200/890 [00:22<01:17,  8.93it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 202/890 [00:22<01:17,  8.88it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 204/890 [00:23<01:17,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 206/890 [00:23<01:17,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 208/890 [00:23<01:17,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▎       | 210/890 [00:23<01:17,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 212/890 [00:24<01:16,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 214/890 [00:24<01:17,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 216/890 [00:24<01:16,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 218/890 [00:24<01:16,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 220/890 [00:24<01:16,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 222/890 [00:25<01:15,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 224/890 [00:25<01:16,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 226/890 [00:25<01:15,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 228/890 [00:25<01:15,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 230/890 [00:26<01:15,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 232/890 [00:26<01:15,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▋       | 234/890 [00:26<01:15,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 236/890 [00:26<01:14,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 238/890 [00:26<01:15,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 240/890 [00:27<01:15,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 242/890 [00:27<01:15,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 244/890 [00:27<01:14,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 246/890 [00:27<01:14,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 248/890 [00:28<01:13,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 250/890 [00:28<01:12,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 252/890 [00:28<01:13,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▊       | 254/890 [00:28<01:11,  8.92it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 256/890 [00:29<01:12,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 258/890 [00:29<01:11,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 260/890 [00:29<01:11,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 262/890 [00:29<01:11,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 264/890 [00:29<01:11,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 266/890 [00:30<01:10,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 268/890 [00:30<01:11,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 270/890 [00:30<01:10,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 272/890 [00:30<01:09,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 274/890 [00:31<01:10,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 276/890 [00:31<01:09,  8.88it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 278/890 [00:31<01:09,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███▏      | 280/890 [00:31<01:09,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 282/890 [00:31<01:08,  8.88it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 284/890 [00:32<01:07,  8.93it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 286/890 [00:32<01:08,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 288/890 [00:32<01:08,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 290/890 [00:32<01:07,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 292/890 [00:33<01:08,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 294/890 [00:33<01:08,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 296/890 [00:33<01:06,  8.88it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 298/890 [00:33<01:07,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▎      | 300/890 [00:34<01:06,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 302/890 [00:34<01:06,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 304/890 [00:34<01:06,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 306/890 [00:34<01:06,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 308/890 [00:34<01:06,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 310/890 [00:35<01:05,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 312/890 [00:35<01:05,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 314/890 [00:35<01:05,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 316/890 [00:35<01:05,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 318/890 [00:36<01:05,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 320/890 [00:36<01:04,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 322/890 [00:36<01:04,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▋      | 324/890 [00:36<01:04,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 326/890 [00:36<01:04,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 328/890 [00:37<01:03,  8.90it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 330/890 [00:37<01:04,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 332/890 [00:37<01:03,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 334/890 [00:37<01:03,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 336/890 [00:38<01:03,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 338/890 [00:38<01:02,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 340/890 [00:38<01:02,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 342/890 [00:38<01:02,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▊      | 344/890 [00:39<01:03,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 346/890 [00:39<01:01,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 348/890 [00:39<01:02,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 350/890 [00:39<01:01,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 352/890 [00:39<01:00,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 354/890 [00:40<01:01,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 356/890 [00:40<01:00,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 358/890 [00:40<01:01,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 360/890 [00:40<01:00,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 362/890 [00:41<01:00,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 364/890 [00:41<00:59,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 366/890 [00:41<00:59,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████▏     | 368/890 [00:41<00:59,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 370/890 [00:42<00:59,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 372/890 [00:42<00:59,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 374/890 [00:42<00:59,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 376/890 [00:42<00:59,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 378/890 [00:42<00:58,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 380/890 [00:43<00:58,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 382/890 [00:43<00:58,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 384/890 [00:43<00:58,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 386/890 [00:43<00:57,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▎     | 388/890 [00:44<00:57,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 390/890 [00:44<00:57,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 392/890 [00:44<00:57,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 394/890 [00:44<00:56,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 396/890 [00:44<00:56,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 398/890 [00:45<00:56,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 400/890 [00:45<00:56,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 402/890 [00:45<00:55,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 404/890 [00:45<00:55,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 406/890 [00:46<00:55,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 408/890 [00:46<00:55,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 410/890 [00:46<00:55,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▋     | 412/890 [00:46<00:54,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 414/890 [00:47<00:54,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 416/890 [00:47<00:54,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 418/890 [00:47<00:54,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 420/890 [00:47<00:53,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 422/890 [00:47<00:54,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 424/890 [00:48<00:52,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 426/890 [00:48<00:53,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 428/890 [00:48<00:52,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 430/890 [00:48<00:52,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▊     | 432/890 [00:49<00:52,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 434/890 [00:49<00:52,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 436/890 [00:49<00:52,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 438/890 [00:49<00:52,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 440/890 [00:50<00:52,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 442/890 [00:50<00:51,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 444/890 [00:50<00:51,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 446/890 [00:50<00:51,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 448/890 [00:50<00:50,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 450/890 [00:51<00:51,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 452/890 [00:51<00:49,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 454/890 [00:51<00:50,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 456/890 [00:51<00:49,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████▏    | 458/890 [00:52<00:49,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 460/890 [00:52<00:49,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 462/890 [00:52<00:48,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 464/890 [00:52<00:48,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 466/890 [00:53<00:48,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 468/890 [00:53<00:49,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 470/890 [00:53<00:48,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 472/890 [00:53<00:48,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 474/890 [00:53<00:47,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 476/890 [00:54<00:47,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▎    | 478/890 [00:54<00:47,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 480/890 [00:54<00:47,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 482/890 [00:54<00:46,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 484/890 [00:55<00:47,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 486/890 [00:55<00:46,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 488/890 [00:55<00:46,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 490/890 [00:55<00:46,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 492/890 [00:56<00:45,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 494/890 [00:56<00:46,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 496/890 [00:56<00:45,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 498/890 [00:56<00:45,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 500/890 [00:56<00:44,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▋    | 502/890 [00:57<00:44,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 504/890 [00:57<00:44,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 506/890 [00:57<00:43,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 508/890 [00:57<00:43,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 510/890 [00:58<00:43,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 512/890 [00:58<00:44,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 514/890 [00:58<00:43,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 516/890 [00:58<00:43,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 518/890 [00:59<00:42,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 520/890 [00:59<00:42,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▊    | 522/890 [00:59<00:42,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 524/890 [00:59<00:42,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 526/890 [00:59<00:42,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 528/890 [01:00<00:41,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 530/890 [01:00<00:41,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 532/890 [01:00<00:41,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 534/890 [01:00<00:41,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 536/890 [01:01<00:41,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 538/890 [01:01<00:40,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 540/890 [01:01<00:40,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 542/890 [01:01<00:40,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 544/890 [01:02<00:39,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████▏   | 546/890 [01:02<00:39,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 548/890 [01:02<00:39,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 550/890 [01:02<00:39,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 552/890 [01:02<00:38,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 554/890 [01:03<00:38,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 556/890 [01:03<00:38,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 558/890 [01:03<00:38,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 560/890 [01:03<00:38,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 562/890 [01:04<00:37,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 564/890 [01:04<00:37,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▎   | 566/890 [01:04<00:37,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 568/890 [01:04<00:36,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 570/890 [01:05<00:36,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 572/890 [01:05<00:36,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 574/890 [01:05<00:37,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 576/890 [01:05<00:36,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 578/890 [01:05<00:35,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 580/890 [01:06<00:36,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 582/890 [01:06<00:35,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 584/890 [01:06<00:35,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 586/890 [01:06<00:34,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 588/890 [01:07<00:35,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▋   | 590/890 [01:07<00:34,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 592/890 [01:07<00:34,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 594/890 [01:07<00:34,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 596/890 [01:08<00:33,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 598/890 [01:08<00:33,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 600/890 [01:08<00:33,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 602/890 [01:08<00:33,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 604/890 [01:08<00:33,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 606/890 [01:09<00:33,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 608/890 [01:09<00:32,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▊   | 610/890 [01:09<00:32,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 612/890 [01:09<00:32,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 614/890 [01:10<00:31,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 616/890 [01:10<00:31,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 618/890 [01:10<00:31,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 620/890 [01:10<00:30,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 622/890 [01:11<00:31,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 624/890 [01:11<00:30,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 626/890 [01:11<00:30,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 628/890 [01:11<00:30,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 630/890 [01:11<00:29,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 632/890 [01:12<00:29,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 634/890 [01:12<00:29,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████▏  | 636/890 [01:12<00:29,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 638/890 [01:12<00:28,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 640/890 [01:13<00:29,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 642/890 [01:13<00:28,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 644/890 [01:13<00:28,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 646/890 [01:13<00:28,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 648/890 [01:14<00:27,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 650/890 [01:14<00:28,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 652/890 [01:14<00:27,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 654/890 [01:14<00:27,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▎  | 656/890 [01:14<00:26,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 658/890 [01:15<00:26,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 660/890 [01:15<00:26,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 662/890 [01:15<00:26,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 664/890 [01:15<00:26,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 666/890 [01:16<00:25,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 668/890 [01:16<00:25,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 670/890 [01:16<00:25,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 672/890 [01:16<00:25,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 674/890 [01:17<00:25,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 676/890 [01:17<00:25,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 678/890 [01:17<00:24,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▋  | 680/890 [01:17<00:24,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 682/890 [01:18<00:24,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 684/890 [01:18<00:24,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 686/890 [01:18<00:23,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 688/890 [01:18<00:23,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 690/890 [01:18<00:22,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 692/890 [01:19<00:22,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 694/890 [01:19<00:22,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 696/890 [01:19<00:22,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 698/890 [01:19<00:21,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▊  | 700/890 [01:20<00:22,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 702/890 [01:20<00:21,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 704/890 [01:20<00:21,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 706/890 [01:20<00:20,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 708/890 [01:21<00:21,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 710/890 [01:21<00:20,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 712/890 [01:21<00:20,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 714/890 [01:21<00:20,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 716/890 [01:21<00:19,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 718/890 [01:22<00:20,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 720/890 [01:22<00:19,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 722/890 [01:22<00:19,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████▏ | 724/890 [01:22<00:19,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 726/890 [01:23<00:18,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 728/890 [01:23<00:18,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 730/890 [01:23<00:18,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 732/890 [01:23<00:18,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 734/890 [01:24<00:17,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 736/890 [01:24<00:17,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 738/890 [01:24<00:17,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 740/890 [01:24<00:17,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 742/890 [01:24<00:17,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▎ | 744/890 [01:25<00:16,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 746/890 [01:25<00:16,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 748/890 [01:25<00:16,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 750/890 [01:25<00:16,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 752/890 [01:26<00:15,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 754/890 [01:26<00:15,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 756/890 [01:26<00:15,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 758/890 [01:26<00:15,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 760/890 [01:27<00:15,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 762/890 [01:27<00:14,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 764/890 [01:27<00:14,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 766/890 [01:27<00:14,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▋ | 768/890 [01:27<00:14,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 770/890 [01:28<00:13,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 772/890 [01:28<00:13,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 774/890 [01:28<00:13,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 776/890 [01:28<00:13,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 778/890 [01:29<00:12,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 780/890 [01:29<00:12,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 782/890 [01:29<00:12,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 784/890 [01:29<00:12,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 786/890 [01:30<00:11,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▊ | 788/890 [01:30<00:11,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 790/890 [01:30<00:11,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 792/890 [01:30<00:11,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 794/890 [01:30<00:11,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 796/890 [01:31<00:10,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 798/890 [01:31<00:10,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 800/890 [01:31<00:10,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 802/890 [01:31<00:10,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 804/890 [01:32<00:09,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 806/890 [01:32<00:09,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 808/890 [01:32<00:09,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 810/890 [01:32<00:09,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 812/890 [01:33<00:08,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████▏| 814/890 [01:33<00:08,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 816/890 [01:33<00:08,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 818/890 [01:33<00:08,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 820/890 [01:33<00:08,  8.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 822/890 [01:34<00:07,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 824/890 [01:34<00:07,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 826/890 [01:34<00:07,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 828/890 [01:34<00:07,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 830/890 [01:35<00:07,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 832/890 [01:35<00:06,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▎| 834/890 [01:35<00:06,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 836/890 [01:35<00:06,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 838/890 [01:36<00:06,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 840/890 [01:36<00:05,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 842/890 [01:36<00:05,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 844/890 [01:36<00:05,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 846/890 [01:36<00:05,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 848/890 [01:37<00:04,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 850/890 [01:37<00:04,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 852/890 [01:37<00:04,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 854/890 [01:37<00:04,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 856/890 [01:38<00:03,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▋| 858/890 [01:38<00:03,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 860/890 [01:38<00:03,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 862/890 [01:38<00:03,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 864/890 [01:39<00:02,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 866/890 [01:39<00:02,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 868/890 [01:39<00:02,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 870/890 [01:39<00:02,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 872/890 [01:39<00:02,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 874/890 [01:40<00:01,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 876/890 [01:40<00:01,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▊| 878/890 [01:40<00:01,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 880/890 [01:40<00:01,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 882/890 [01:41<00:00,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 884/890 [01:41<00:00,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 886/890 [01:41<00:00,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 888/890 [01:41<00:00,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|██████████| 890/890 [01:42<00:00,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])





input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torc

In [None]:
evaluate(model, df_test)

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torc

In [None]:
EPOCHS = 5 #EPOCH 5로 늘려서 진행
model = BertClassifier()
LR = 1e-6

train(model, df_train, df_val, LR, EPOCHS)

  0%|          | 2/890 [00:00<01:08, 12.95it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  0%|          | 4/890 [00:00<01:27, 10.14it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 6/890 [00:00<01:32,  9.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 8/890 [00:00<01:35,  9.24it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 10/890 [00:01<01:37,  9.02it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|▏         | 12/890 [00:01<01:38,  8.92it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 14/890 [00:01<01:38,  8.94it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 16/890 [00:01<01:39,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 18/890 [00:01<01:38,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 20/890 [00:02<01:38,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 22/890 [00:02<01:40,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 24/890 [00:02<01:38,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 26/890 [00:02<01:37,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 28/890 [00:03<01:38,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 30/890 [00:03<01:37,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▎         | 32/890 [00:03<01:36,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 34/890 [00:03<01:37,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 36/890 [00:04<01:36,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 38/890 [00:04<01:36,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 40/890 [00:04<01:37,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 42/890 [00:04<01:35,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 44/890 [00:04<01:36,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 46/890 [00:05<01:35,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 48/890 [00:05<01:35,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 50/890 [00:05<01:36,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 52/890 [00:05<01:35,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 54/890 [00:06<01:35,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▋         | 56/890 [00:06<01:35,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 58/890 [00:06<01:35,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 60/890 [00:06<01:34,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 62/890 [00:06<01:33,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 64/890 [00:07<01:34,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 66/890 [00:07<01:33,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 68/890 [00:07<01:34,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 70/890 [00:07<01:33,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 72/890 [00:08<01:34,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 74/890 [00:08<01:33,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▊         | 76/890 [00:08<01:35,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 78/890 [00:08<01:32,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 80/890 [00:09<01:33,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 82/890 [00:09<01:32,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 84/890 [00:09<01:32,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 86/890 [00:09<01:32,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 88/890 [00:09<01:32,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 90/890 [00:10<01:31,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 92/890 [00:10<01:31,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 94/890 [00:10<01:31,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 96/890 [00:10<01:32,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 98/890 [00:11<01:31,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 100/890 [00:11<01:29,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█▏        | 102/890 [00:11<01:29,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 104/890 [00:11<01:28,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 106/890 [00:12<01:30,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 108/890 [00:12<01:28,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 110/890 [00:12<01:29,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 112/890 [00:12<01:28,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 114/890 [00:12<01:30,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 116/890 [00:13<01:28,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 118/890 [00:13<01:29,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 120/890 [00:13<01:28,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▎        | 122/890 [00:13<01:30,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 124/890 [00:14<01:29,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 126/890 [00:14<01:27,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 128/890 [00:14<01:28,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 130/890 [00:14<01:27,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 132/890 [00:15<01:28,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 134/890 [00:15<01:28,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 136/890 [00:15<01:26,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 138/890 [00:15<01:26,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 140/890 [00:15<01:25,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 142/890 [00:16<01:27,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 144/890 [00:16<01:27,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▋        | 146/890 [00:16<01:26,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 148/890 [00:16<01:26,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 150/890 [00:17<01:27,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 152/890 [00:17<01:24,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 154/890 [00:17<01:26,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 156/890 [00:17<01:26,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 158/890 [00:18<01:24,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 160/890 [00:18<01:25,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 162/890 [00:18<01:25,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 164/890 [00:18<01:24,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▊        | 166/890 [00:18<01:24,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 168/890 [00:19<01:25,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 170/890 [00:19<01:25,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 172/890 [00:19<01:25,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 174/890 [00:19<01:23,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 176/890 [00:20<01:22,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 178/890 [00:20<01:21,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 180/890 [00:20<01:22,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 182/890 [00:20<01:23,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 184/890 [00:21<01:23,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 186/890 [00:21<01:23,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 188/890 [00:21<01:24,  8.32it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██▏       | 190/890 [00:21<01:21,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 192/890 [00:22<01:22,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 194/890 [00:22<01:22,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 196/890 [00:22<01:21,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 198/890 [00:22<01:20,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 200/890 [00:22<01:20,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 202/890 [00:23<01:21,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 204/890 [00:23<01:21,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 206/890 [00:23<01:19,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 208/890 [00:23<01:19,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▎       | 210/890 [00:24<01:20,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 212/890 [00:24<01:20,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 214/890 [00:24<01:20,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 216/890 [00:24<01:19,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 218/890 [00:25<01:18,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 220/890 [00:25<01:19,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 222/890 [00:25<01:18,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 224/890 [00:25<01:19,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 226/890 [00:26<01:19,  8.37it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 228/890 [00:26<01:19,  8.34it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 230/890 [00:26<01:18,  8.37it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 232/890 [00:26<01:17,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▋       | 234/890 [00:27<01:17,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 236/890 [00:27<01:17,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 238/890 [00:27<01:16,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 240/890 [00:27<01:15,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 242/890 [00:27<01:16,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 244/890 [00:28<01:16,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 246/890 [00:28<01:16,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 248/890 [00:28<01:17,  8.32it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 250/890 [00:28<01:16,  8.34it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 252/890 [00:29<01:14,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▊       | 254/890 [00:29<01:14,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 256/890 [00:29<01:14,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 258/890 [00:29<01:14,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 260/890 [00:30<01:13,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 262/890 [00:30<01:13,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 264/890 [00:30<01:13,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 266/890 [00:30<01:14,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 268/890 [00:31<01:14,  8.34it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 270/890 [00:31<01:14,  8.27it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 272/890 [00:31<01:13,  8.35it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 274/890 [00:31<01:12,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 276/890 [00:31<01:12,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 278/890 [00:32<01:11,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███▏      | 280/890 [00:32<01:12,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 282/890 [00:32<01:12,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 284/890 [00:32<01:10,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 286/890 [00:33<01:11,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 288/890 [00:33<01:11,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 290/890 [00:33<01:10,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 292/890 [00:33<01:09,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 294/890 [00:34<01:09,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 296/890 [00:34<01:10,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 298/890 [00:34<01:10,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▎      | 300/890 [00:34<01:10,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 302/890 [00:35<01:07,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 304/890 [00:35<01:08,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 306/890 [00:35<01:09,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 308/890 [00:35<01:09,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 310/890 [00:35<01:09,  8.35it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 312/890 [00:36<01:07,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 314/890 [00:36<01:08,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 316/890 [00:36<01:07,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 318/890 [00:36<01:07,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 320/890 [00:37<01:06,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 322/890 [00:37<01:05,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▋      | 324/890 [00:37<01:06,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 326/890 [00:37<01:06,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 328/890 [00:38<01:06,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 330/890 [00:38<01:05,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 332/890 [00:38<01:05,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 334/890 [00:38<01:05,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 336/890 [00:39<01:05,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 338/890 [00:39<01:05,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 340/890 [00:39<01:04,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 342/890 [00:39<01:04,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▊      | 344/890 [00:39<01:04,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 346/890 [00:40<01:02,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 348/890 [00:40<01:03,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 350/890 [00:40<01:02,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 352/890 [00:40<01:02,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 354/890 [00:41<01:02,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 356/890 [00:41<01:03,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 358/890 [00:41<01:01,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 360/890 [00:41<01:01,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 362/890 [00:42<01:02,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 364/890 [00:42<01:00,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 366/890 [00:42<01:00,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████▏     | 368/890 [00:42<01:01,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 370/890 [00:42<00:59,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 372/890 [00:43<01:00,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 374/890 [00:43<01:00,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 376/890 [00:43<01:00,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 378/890 [00:43<00:58,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 380/890 [00:44<00:59,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 382/890 [00:44<00:58,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 384/890 [00:44<00:59,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 386/890 [00:44<00:59,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▎     | 388/890 [00:45<00:58,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 390/890 [00:45<00:57,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 392/890 [00:45<00:57,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 394/890 [00:45<00:57,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 396/890 [00:46<00:57,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 398/890 [00:46<00:56,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 400/890 [00:46<00:57,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 402/890 [00:46<00:55,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 404/890 [00:46<00:56,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 406/890 [00:47<00:55,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 408/890 [00:47<00:55,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 410/890 [00:47<00:56,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▋     | 412/890 [00:47<00:55,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 414/890 [00:48<00:55,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 416/890 [00:48<00:55,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 418/890 [00:48<00:54,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 420/890 [00:48<00:54,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 422/890 [00:49<00:54,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 424/890 [00:49<00:53,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 426/890 [00:49<00:54,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 428/890 [00:49<00:53,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 429/890 [00:49<00:52,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▊     | 432/890 [00:50<00:56,  8.18it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 434/890 [00:50<00:54,  8.33it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 436/890 [00:50<00:53,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 438/890 [00:50<00:53,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 440/890 [00:51<00:52,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 442/890 [00:51<00:52,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 444/890 [00:51<00:52,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 446/890 [00:51<00:52,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 448/890 [00:52<00:50,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 450/890 [00:52<00:51,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 452/890 [00:52<00:51,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 454/890 [00:52<00:50,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 456/890 [00:53<00:50,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████▏    | 458/890 [00:53<00:50,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 460/890 [00:53<00:49,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 462/890 [00:53<00:49,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 464/890 [00:53<00:49,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 466/890 [00:54<00:49,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 468/890 [00:54<00:48,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 470/890 [00:54<00:48,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 472/890 [00:54<00:48,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 474/890 [00:55<00:48,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 476/890 [00:55<00:48,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▎    | 478/890 [00:55<00:47,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 480/890 [00:55<00:47,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 482/890 [00:56<00:46,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 484/890 [00:56<00:47,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 486/890 [00:56<00:47,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 488/890 [00:56<00:46,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 490/890 [00:56<00:46,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 492/890 [00:57<00:45,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 494/890 [00:57<00:45,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 496/890 [00:57<00:45,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 498/890 [00:57<00:45,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 500/890 [00:58<00:45,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▋    | 502/890 [00:58<00:44,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 504/890 [00:58<00:45,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 506/890 [00:58<00:44,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 508/890 [00:59<00:44,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 510/890 [00:59<00:43,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 512/890 [00:59<00:43,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 514/890 [00:59<00:43,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 516/890 [00:59<00:42,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 518/890 [01:00<00:43,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 520/890 [01:00<00:43,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▊    | 522/890 [01:00<00:42,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 524/890 [01:00<00:42,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 526/890 [01:01<00:41,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 528/890 [01:01<00:41,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 530/890 [01:01<00:41,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 532/890 [01:01<00:41,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 534/890 [01:02<00:40,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 536/890 [01:02<00:41,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 538/890 [01:02<00:40,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 540/890 [01:02<00:40,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 542/890 [01:02<00:39,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 544/890 [01:03<00:40,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████▏   | 546/890 [01:03<00:39,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 548/890 [01:03<00:39,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 550/890 [01:03<00:39,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 552/890 [01:04<00:39,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 554/890 [01:04<00:38,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 556/890 [01:04<00:38,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 558/890 [01:04<00:38,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 560/890 [01:05<00:37,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 562/890 [01:05<00:38,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 564/890 [01:05<00:37,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▎   | 566/890 [01:05<00:37,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 568/890 [01:05<00:37,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 570/890 [01:06<00:37,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 572/890 [01:06<00:36,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 574/890 [01:06<00:36,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 576/890 [01:06<00:36,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 578/890 [01:07<00:35,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 580/890 [01:07<00:36,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 582/890 [01:07<00:35,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 584/890 [01:07<00:35,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 586/890 [01:08<00:34,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 588/890 [01:08<00:34,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▋   | 590/890 [01:08<00:34,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 592/890 [01:08<00:34,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 594/890 [01:08<00:34,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 596/890 [01:09<00:33,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 598/890 [01:09<00:33,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 600/890 [01:09<00:33,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 602/890 [01:09<00:32,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 604/890 [01:10<00:32,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 606/890 [01:10<00:32,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 608/890 [01:10<00:32,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▊   | 610/890 [01:10<00:32,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 612/890 [01:11<00:31,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 614/890 [01:11<00:31,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 616/890 [01:11<00:31,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 618/890 [01:11<00:31,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 620/890 [01:11<00:30,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 622/890 [01:12<00:30,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 624/890 [01:12<00:30,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 626/890 [01:12<00:30,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 628/890 [01:12<00:30,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 630/890 [01:13<00:30,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 632/890 [01:13<00:29,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 634/890 [01:13<00:28,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████▏  | 636/890 [01:13<00:29,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 638/890 [01:14<00:28,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 640/890 [01:14<00:28,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 642/890 [01:14<00:28,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 644/890 [01:14<00:28,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 646/890 [01:14<00:27,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 648/890 [01:15<00:27,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 650/890 [01:15<00:27,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 652/890 [01:15<00:27,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 654/890 [01:15<00:26,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▎  | 656/890 [01:16<00:26,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 658/890 [01:16<00:26,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 660/890 [01:16<00:26,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 662/890 [01:16<00:26,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 664/890 [01:17<00:25,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 666/890 [01:17<00:25,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 668/890 [01:17<00:25,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 670/890 [01:17<00:24,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 672/890 [01:17<00:24,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 674/890 [01:18<00:24,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 676/890 [01:18<00:24,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 678/890 [01:18<00:24,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▋  | 680/890 [01:18<00:24,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 682/890 [01:19<00:23,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 684/890 [01:19<00:23,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 686/890 [01:19<00:23,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 688/890 [01:19<00:23,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 690/890 [01:19<00:22,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 692/890 [01:20<00:22,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 694/890 [01:20<00:22,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 696/890 [01:20<00:22,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 698/890 [01:20<00:21,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▊  | 700/890 [01:21<00:22,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 702/890 [01:21<00:21,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 704/890 [01:21<00:21,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 706/890 [01:21<00:21,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 708/890 [01:22<00:20,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 710/890 [01:22<00:20,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 712/890 [01:22<00:20,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 714/890 [01:22<00:20,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 716/890 [01:22<00:20,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 718/890 [01:23<00:19,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 720/890 [01:23<00:19,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 722/890 [01:23<00:19,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████▏ | 724/890 [01:23<00:18,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 726/890 [01:24<00:18,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 728/890 [01:24<00:18,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 730/890 [01:24<00:18,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 732/890 [01:24<00:17,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 734/890 [01:25<00:18,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 736/890 [01:25<00:17,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 738/890 [01:25<00:17,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 740/890 [01:25<00:16,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 742/890 [01:25<00:17,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▎ | 744/890 [01:26<00:16,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 746/890 [01:26<00:16,  8.89it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 748/890 [01:26<00:16,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 750/890 [01:26<00:15,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 752/890 [01:27<00:15,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 754/890 [01:27<00:15,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 756/890 [01:27<00:15,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 758/890 [01:27<00:15,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 760/890 [01:27<00:14,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 762/890 [01:28<00:14,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 764/890 [01:28<00:14,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 766/890 [01:28<00:13,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▋ | 768/890 [01:28<00:13,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 770/890 [01:29<00:13,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 772/890 [01:29<00:13,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 774/890 [01:29<00:13,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 776/890 [01:29<00:13,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 778/890 [01:30<00:12,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 780/890 [01:30<00:12,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 782/890 [01:30<00:12,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 784/890 [01:30<00:12,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 786/890 [01:30<00:11,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▊ | 788/890 [01:31<00:11,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 790/890 [01:31<00:11,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 792/890 [01:31<00:11,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 794/890 [01:31<00:11,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 796/890 [01:32<00:10,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 798/890 [01:32<00:10,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 800/890 [01:32<00:10,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 802/890 [01:32<00:10,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 804/890 [01:33<00:09,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 806/890 [01:33<00:09,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 808/890 [01:33<00:09,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 810/890 [01:33<00:09,  8.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 812/890 [01:33<00:08,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████▏| 814/890 [01:34<00:08,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 816/890 [01:34<00:08,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 818/890 [01:34<00:08,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 820/890 [01:34<00:08,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 822/890 [01:35<00:07,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 824/890 [01:35<00:07,  8.84it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 826/890 [01:35<00:07,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 828/890 [01:35<00:07,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 830/890 [01:35<00:06,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 832/890 [01:36<00:06,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▎| 834/890 [01:36<00:06,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 836/890 [01:36<00:06,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 838/890 [01:36<00:05,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 840/890 [01:37<00:05,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 842/890 [01:37<00:05,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 844/890 [01:37<00:05,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 846/890 [01:37<00:05,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 848/890 [01:38<00:04,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 850/890 [01:38<00:04,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 852/890 [01:38<00:04,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 854/890 [01:38<00:04,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 856/890 [01:38<00:03,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▋| 858/890 [01:39<00:03,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 860/890 [01:39<00:03,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 862/890 [01:39<00:03,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 864/890 [01:39<00:02,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 866/890 [01:40<00:02,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 868/890 [01:40<00:02,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 870/890 [01:40<00:02,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 872/890 [01:40<00:02,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 874/890 [01:41<00:01,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 876/890 [01:41<00:01,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▊| 878/890 [01:41<00:01,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 880/890 [01:41<00:01,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 882/890 [01:41<00:00,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 884/890 [01:42<00:00,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 886/890 [01:42<00:00,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 888/890 [01:42<00:00,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|██████████| 890/890 [01:42<00:00,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])





input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torc

  0%|          | 0/890 [00:00<?, ?it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  0%|          | 2/890 [00:00<01:15, 11.83it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  0%|          | 4/890 [00:00<01:29,  9.89it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 6/890 [00:00<01:35,  9.25it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 7/890 [00:00<01:36,  9.19it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 8/890 [00:00<01:37,  9.03it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 9/890 [00:00<01:39,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 10/890 [00:01<01:40,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 11/890 [00:01<01:38,  8.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|▏         | 12/890 [00:01<01:39,  8.85it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|▏         | 13/890 [00:01<01:40,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 14/890 [00:01<01:39,  8.82it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 15/890 [00:01<01:39,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 16/890 [00:01<01:40,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 17/890 [00:01<01:40,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 18/890 [00:02<01:39,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 19/890 [00:02<01:39,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 20/890 [00:02<01:39,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 21/890 [00:02<01:39,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 22/890 [00:02<01:38,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 23/890 [00:02<01:39,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 24/890 [00:02<01:40,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 25/890 [00:02<01:39,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 26/890 [00:02<01:39,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 27/890 [00:03<01:39,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 28/890 [00:03<01:39,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 29/890 [00:03<01:40,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 30/890 [00:03<01:38,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 31/890 [00:03<01:38,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▎         | 32/890 [00:03<01:38,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▎         | 33/890 [00:03<01:39,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 34/890 [00:03<01:37,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 35/890 [00:03<01:37,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 36/890 [00:04<01:39,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 37/890 [00:04<01:38,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 38/890 [00:04<01:37,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 39/890 [00:04<01:37,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 40/890 [00:04<01:37,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 41/890 [00:04<01:38,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 42/890 [00:04<01:36,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 43/890 [00:04<01:36,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 45/890 [00:05<01:42,  8.27it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 46/890 [00:05<01:40,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 47/890 [00:05<01:39,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 48/890 [00:05<01:37,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 49/890 [00:05<01:37,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 50/890 [00:05<01:37,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 51/890 [00:05<01:37,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 52/890 [00:05<01:36,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 53/890 [00:06<01:36,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 54/890 [00:06<01:37,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 55/890 [00:06<01:37,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▋         | 56/890 [00:06<01:36,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▋         | 57/890 [00:06<01:35,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 58/890 [00:06<01:35,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 59/890 [00:06<01:37,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 60/890 [00:06<01:35,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 61/890 [00:06<01:34,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 62/890 [00:07<01:35,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 63/890 [00:07<01:35,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 64/890 [00:07<01:35,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 65/890 [00:07<01:34,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 66/890 [00:07<01:34,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 67/890 [00:07<01:34,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 68/890 [00:07<01:35,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 69/890 [00:07<01:34,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 70/890 [00:08<01:34,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 71/890 [00:08<01:34,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 72/890 [00:08<01:34,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 73/890 [00:08<01:33,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 74/890 [00:08<01:33,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 75/890 [00:08<01:34,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▊         | 76/890 [00:08<01:36,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▊         | 77/890 [00:08<01:34,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 78/890 [00:08<01:33,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 79/890 [00:09<01:33,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 80/890 [00:09<01:33,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 81/890 [00:09<01:34,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 82/890 [00:09<01:33,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 83/890 [00:09<01:32,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 84/890 [00:09<01:32,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 85/890 [00:09<01:33,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 86/890 [00:09<01:33,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 87/890 [00:09<01:32,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 88/890 [00:10<01:32,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 89/890 [00:10<01:33,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 90/890 [00:10<01:34,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 91/890 [00:10<01:32,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 92/890 [00:10<01:31,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 93/890 [00:10<01:31,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 94/890 [00:10<01:32,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 95/890 [00:10<01:32,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 96/890 [00:11<01:32,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 97/890 [00:11<01:32,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 98/890 [00:11<01:30,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 99/890 [00:11<01:31,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 100/890 [00:11<01:31,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█▏        | 101/890 [00:11<01:32,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█▏        | 102/890 [00:11<01:30,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 103/890 [00:11<01:30,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 104/890 [00:11<01:30,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 105/890 [00:12<01:30,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 106/890 [00:12<01:29,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 107/890 [00:12<01:29,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 108/890 [00:12<01:30,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 109/890 [00:12<01:30,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 110/890 [00:12<01:30,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 111/890 [00:12<01:29,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 112/890 [00:12<01:30,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 113/890 [00:12<01:30,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 114/890 [00:13<01:30,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 115/890 [00:13<01:30,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 116/890 [00:13<01:28,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 117/890 [00:13<01:28,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 118/890 [00:13<01:29,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 119/890 [00:13<01:29,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 120/890 [00:13<01:27,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▎        | 121/890 [00:13<01:28,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▎        | 122/890 [00:14<01:28,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 123/890 [00:14<01:29,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 124/890 [00:14<01:29,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 125/890 [00:14<01:27,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 126/890 [00:14<01:27,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 127/890 [00:14<01:28,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 128/890 [00:14<01:28,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 129/890 [00:14<01:27,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 130/890 [00:14<01:28,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 131/890 [00:15<01:27,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 132/890 [00:15<01:27,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 133/890 [00:15<01:26,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 134/890 [00:15<01:26,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 135/890 [00:15<01:27,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 136/890 [00:15<01:27,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 137/890 [00:15<01:26,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 138/890 [00:15<01:26,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 139/890 [00:15<01:26,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 140/890 [00:16<01:27,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 141/890 [00:16<01:26,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 142/890 [00:16<01:25,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 143/890 [00:16<01:25,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 144/890 [00:16<01:26,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▋        | 145/890 [00:16<01:26,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▋        | 146/890 [00:16<01:25,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 147/890 [00:16<01:25,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 148/890 [00:17<01:25,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 149/890 [00:17<01:25,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 150/890 [00:17<01:24,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 151/890 [00:17<01:24,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 152/890 [00:17<01:25,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 153/890 [00:17<01:26,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 154/890 [00:17<01:25,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 155/890 [00:17<01:26,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 156/890 [00:17<01:23,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 157/890 [00:18<01:23,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 158/890 [00:18<01:24,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 159/890 [00:18<01:23,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 160/890 [00:18<01:24,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 161/890 [00:18<01:23,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 162/890 [00:18<01:24,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 163/890 [00:18<01:24,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 164/890 [00:18<01:24,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▊        | 165/890 [00:18<01:22,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▊        | 166/890 [00:19<01:23,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 167/890 [00:19<01:23,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 168/890 [00:19<01:23,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 169/890 [00:19<01:22,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 170/890 [00:19<01:23,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 171/890 [00:19<01:24,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 172/890 [00:19<01:23,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 173/890 [00:19<01:23,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 174/890 [00:20<01:22,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 175/890 [00:20<01:22,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 176/890 [00:20<01:22,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 177/890 [00:20<01:22,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 178/890 [00:20<01:21,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 179/890 [00:20<01:21,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 180/890 [00:20<01:22,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 181/890 [00:20<01:22,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 182/890 [00:20<01:21,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 183/890 [00:21<01:20,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 184/890 [00:21<01:21,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 185/890 [00:21<01:21,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 186/890 [00:21<01:22,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 187/890 [00:21<01:20,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 188/890 [00:21<01:20,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 189/890 [00:21<01:21,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██▏       | 190/890 [00:21<01:20,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██▏       | 191/890 [00:21<01:19,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 192/890 [00:22<01:19,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 193/890 [00:22<01:20,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 194/890 [00:22<01:20,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 195/890 [00:22<01:19,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 196/890 [00:22<01:19,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 197/890 [00:22<01:20,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 198/890 [00:22<01:21,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 199/890 [00:22<01:20,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 200/890 [00:23<01:20,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 201/890 [00:23<01:19,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 202/890 [00:23<01:19,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 203/890 [00:23<01:19,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 204/890 [00:23<01:19,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 205/890 [00:23<01:19,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 206/890 [00:23<01:18,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 207/890 [00:23<01:19,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 208/890 [00:23<01:19,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 209/890 [00:24<01:20,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▎       | 210/890 [00:24<01:18,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▎       | 211/890 [00:24<01:18,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 212/890 [00:24<01:18,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 213/890 [00:24<01:19,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 214/890 [00:24<01:19,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 215/890 [00:24<01:18,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 216/890 [00:24<01:17,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 217/890 [00:25<01:17,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 218/890 [00:25<01:18,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 219/890 [00:25<01:18,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 220/890 [00:25<01:18,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 221/890 [00:25<01:17,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 222/890 [00:25<01:16,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 223/890 [00:25<01:17,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 224/890 [00:25<01:17,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 225/890 [00:25<01:17,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 226/890 [00:26<01:16,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 227/890 [00:26<01:16,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 228/890 [00:26<01:16,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 229/890 [00:26<01:17,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 230/890 [00:26<01:16,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 231/890 [00:26<01:15,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 232/890 [00:26<01:15,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 233/890 [00:26<01:16,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▋       | 234/890 [00:26<01:16,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▋       | 235/890 [00:27<01:15,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 236/890 [00:27<01:15,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 237/890 [00:27<01:15,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 238/890 [00:27<01:15,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 239/890 [00:27<01:15,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 240/890 [00:27<01:14,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 241/890 [00:27<01:14,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 242/890 [00:27<01:14,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 243/890 [00:28<01:15,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 244/890 [00:28<01:14,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 245/890 [00:28<01:13,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 246/890 [00:28<01:14,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 247/890 [00:28<01:14,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 248/890 [00:28<01:14,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 249/890 [00:28<01:13,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 250/890 [00:28<01:13,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 251/890 [00:28<01:13,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 252/890 [00:29<01:14,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 253/890 [00:29<01:14,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▊       | 254/890 [00:29<01:13,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▊       | 255/890 [00:29<01:13,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 256/890 [00:29<01:14,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 257/890 [00:29<01:13,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 258/890 [00:29<01:13,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 259/890 [00:29<01:12,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 260/890 [00:29<01:12,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 261/890 [00:30<01:12,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 262/890 [00:30<01:12,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 263/890 [00:30<01:11,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 264/890 [00:30<01:11,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 265/890 [00:30<01:12,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 266/890 [00:30<01:12,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 267/890 [00:30<01:11,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 268/890 [00:30<01:12,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 269/890 [00:31<01:11,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 270/890 [00:31<01:12,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 271/890 [00:31<01:12,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 272/890 [00:31<01:11,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 273/890 [00:31<01:11,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 274/890 [00:31<01:11,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 275/890 [00:31<01:11,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 276/890 [00:31<01:11,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 277/890 [00:31<01:10,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 278/890 [00:32<01:09,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███▏      | 279/890 [00:32<01:10,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███▏      | 280/890 [00:32<01:10,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 281/890 [00:32<01:09,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 282/890 [00:32<01:09,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 283/890 [00:32<01:10,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 284/890 [00:32<01:10,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 285/890 [00:32<01:11,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 286/890 [00:32<01:10,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 287/890 [00:33<01:09,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 288/890 [00:33<01:09,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 289/890 [00:33<01:09,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 290/890 [00:33<01:09,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 291/890 [00:33<01:08,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 292/890 [00:33<01:09,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 293/890 [00:33<01:09,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 294/890 [00:33<01:09,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 295/890 [00:34<01:09,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 296/890 [00:34<01:09,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 297/890 [00:34<01:08,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 298/890 [00:34<01:08,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▎      | 299/890 [00:34<01:08,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▎      | 300/890 [00:34<01:07,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 301/890 [00:34<01:07,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 302/890 [00:34<01:07,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 303/890 [00:34<01:08,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 304/890 [00:35<01:07,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 305/890 [00:35<01:07,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 306/890 [00:35<01:06,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 307/890 [00:35<01:07,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 308/890 [00:35<01:07,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 309/890 [00:35<01:07,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 310/890 [00:35<01:06,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 311/890 [00:35<01:06,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 312/890 [00:36<01:06,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 313/890 [00:36<01:07,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 314/890 [00:36<01:06,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 315/890 [00:36<01:05,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 316/890 [00:36<01:06,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 317/890 [00:36<01:06,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 318/890 [00:36<01:06,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 319/890 [00:36<01:07,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 320/890 [00:36<01:07,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 321/890 [00:37<01:06,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 322/890 [00:37<01:05,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▋      | 323/890 [00:37<01:06,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▋      | 324/890 [00:37<01:05,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 325/890 [00:37<01:05,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 326/890 [00:37<01:05,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 327/890 [00:37<01:04,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 328/890 [00:37<01:04,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 329/890 [00:37<01:05,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 330/890 [00:38<01:05,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 331/890 [00:38<01:04,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 332/890 [00:38<01:04,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 333/890 [00:38<01:04,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 334/890 [00:38<01:04,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 335/890 [00:38<01:04,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 336/890 [00:38<01:04,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 337/890 [00:38<01:03,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 338/890 [00:39<01:04,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 339/890 [00:39<01:04,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 340/890 [00:39<01:05,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 341/890 [00:39<01:02,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 342/890 [00:39<01:02,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▊      | 343/890 [00:39<01:03,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▊      | 344/890 [00:39<01:03,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 345/890 [00:39<01:03,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 346/890 [00:39<01:03,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 347/890 [00:40<01:02,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 348/890 [00:40<01:02,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 349/890 [00:40<01:03,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 350/890 [00:40<01:03,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 351/890 [00:40<01:03,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 352/890 [00:40<01:02,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 353/890 [00:40<01:01,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 354/890 [00:40<01:02,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 355/890 [00:41<01:02,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 356/890 [00:41<01:01,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 357/890 [00:41<01:01,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 358/890 [00:41<01:01,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 359/890 [00:41<01:02,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 360/890 [00:41<01:02,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 361/890 [00:41<01:02,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 362/890 [00:41<01:01,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 363/890 [00:41<01:00,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 364/890 [00:42<01:00,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 365/890 [00:42<01:00,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 366/890 [00:42<01:01,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 367/890 [00:42<01:01,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████▏     | 368/890 [00:42<01:01,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████▏     | 369/890 [00:42<01:00,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 370/890 [00:42<01:00,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 371/890 [00:42<01:00,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 372/890 [00:42<01:00,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 373/890 [00:43<01:00,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 374/890 [00:43<01:00,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 375/890 [00:43<01:01,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 376/890 [00:43<00:59,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 377/890 [00:43<00:58,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 378/890 [00:43<00:59,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 379/890 [00:43<00:58,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 380/890 [00:43<00:58,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 381/890 [00:44<00:58,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 382/890 [00:44<00:58,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 383/890 [00:44<00:58,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 384/890 [00:44<00:57,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 385/890 [00:44<00:57,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 386/890 [00:44<00:58,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 387/890 [00:44<00:57,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▎     | 388/890 [00:44<00:57,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▎     | 389/890 [00:44<00:57,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 390/890 [00:45<00:57,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 391/890 [00:45<00:59,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 392/890 [00:45<00:57,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 393/890 [00:45<00:57,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 394/890 [00:45<00:57,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 395/890 [00:45<00:57,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 396/890 [00:45<00:57,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 397/890 [00:45<00:57,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 398/890 [00:45<00:56,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 399/890 [00:46<00:57,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 400/890 [00:46<00:56,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 401/890 [00:46<00:56,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 402/890 [00:46<00:57,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 403/890 [00:46<00:57,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 404/890 [00:46<00:56,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 405/890 [00:46<00:55,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 406/890 [00:46<00:56,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 407/890 [00:47<00:56,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 408/890 [00:47<00:55,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 409/890 [00:47<00:55,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 410/890 [00:47<00:55,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 411/890 [00:47<00:55,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▋     | 412/890 [00:47<00:55,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▋     | 413/890 [00:47<00:55,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 414/890 [00:47<00:54,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 415/890 [00:47<00:55,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 416/890 [00:48<00:55,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 417/890 [00:48<00:55,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 418/890 [00:48<00:54,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 419/890 [00:48<00:54,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 420/890 [00:48<00:54,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 421/890 [00:48<00:54,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 422/890 [00:48<00:54,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 423/890 [00:48<00:53,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 424/890 [00:49<00:53,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 425/890 [00:49<00:53,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 426/890 [00:49<00:53,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 427/890 [00:49<00:52,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 428/890 [00:49<00:52,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 429/890 [00:49<00:53,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 430/890 [00:49<00:54,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 431/890 [00:49<00:53,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▊     | 432/890 [00:49<00:53,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▊     | 433/890 [00:50<00:52,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 434/890 [00:50<00:52,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 435/890 [00:50<00:52,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 436/890 [00:50<00:53,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 437/890 [00:50<00:52,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 438/890 [00:50<00:51,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 439/890 [00:50<00:51,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 440/890 [00:50<00:52,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 441/890 [00:50<00:51,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 442/890 [00:51<00:51,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 443/890 [00:51<00:51,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 444/890 [00:51<00:51,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 445/890 [00:51<00:51,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 446/890 [00:51<00:51,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 447/890 [00:51<00:51,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 448/890 [00:51<00:50,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 449/890 [00:51<00:51,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 450/890 [00:52<00:51,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 451/890 [00:52<00:50,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 452/890 [00:52<00:50,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 453/890 [00:52<00:50,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 454/890 [00:52<00:50,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 455/890 [00:52<00:50,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 456/890 [00:52<00:50,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████▏    | 457/890 [00:52<00:49,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████▏    | 458/890 [00:52<00:49,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 459/890 [00:53<00:49,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 460/890 [00:53<00:49,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 461/890 [00:53<00:49,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 462/890 [00:53<00:49,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 463/890 [00:53<00:49,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 464/890 [00:53<00:49,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 465/890 [00:53<00:49,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 466/890 [00:53<00:48,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 467/890 [00:53<00:48,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 468/890 [00:54<00:48,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 469/890 [00:54<00:49,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 470/890 [00:54<00:48,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 471/890 [00:54<00:48,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 472/890 [00:54<00:47,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 473/890 [00:54<00:48,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 474/890 [00:54<00:47,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 475/890 [00:54<00:47,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 476/890 [00:55<00:47,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▎    | 477/890 [00:55<00:47,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▎    | 478/890 [00:55<00:47,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 479/890 [00:55<00:47,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 480/890 [00:55<00:47,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 481/890 [00:55<00:47,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 482/890 [00:55<00:47,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 483/890 [00:55<00:46,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 484/890 [00:55<00:46,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 485/890 [00:56<00:46,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 486/890 [00:56<00:46,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 487/890 [00:56<00:47,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 488/890 [00:56<00:46,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 489/890 [00:56<00:46,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 490/890 [00:56<00:46,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 491/890 [00:56<00:46,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 492/890 [00:56<00:46,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 493/890 [00:56<00:46,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 494/890 [00:57<00:46,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 495/890 [00:57<00:45,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 496/890 [00:57<00:45,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 497/890 [00:57<00:45,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 498/890 [00:57<00:45,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 499/890 [00:57<00:46,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 500/890 [00:57<00:45,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▋    | 501/890 [00:57<00:44,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▋    | 502/890 [00:58<00:45,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 503/890 [00:58<00:45,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 504/890 [00:58<00:44,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 505/890 [00:58<00:44,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 506/890 [00:58<00:44,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 507/890 [00:58<00:44,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 508/890 [00:58<00:44,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 509/890 [00:58<00:43,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 510/890 [00:58<00:43,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 511/890 [00:59<00:44,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 512/890 [00:59<00:43,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 513/890 [00:59<00:43,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 514/890 [00:59<00:43,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 515/890 [00:59<00:43,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 516/890 [00:59<00:42,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 517/890 [00:59<00:42,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 518/890 [00:59<00:42,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 519/890 [00:59<00:42,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 520/890 [01:00<00:42,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▊    | 521/890 [01:00<00:42,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▊    | 522/890 [01:00<00:42,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 523/890 [01:00<00:42,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 524/890 [01:00<00:42,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 525/890 [01:00<00:42,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 526/890 [01:00<00:41,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 527/890 [01:00<00:41,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 528/890 [01:01<00:41,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 529/890 [01:01<00:41,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 530/890 [01:01<00:40,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 531/890 [01:01<00:41,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 532/890 [01:01<00:41,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 533/890 [01:01<00:41,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 534/890 [01:01<00:41,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 535/890 [01:01<00:40,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 536/890 [01:01<00:40,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 537/890 [01:02<00:41,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 538/890 [01:02<00:40,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 539/890 [01:02<00:40,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 540/890 [01:02<00:40,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 541/890 [01:02<00:40,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 542/890 [01:02<00:40,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 543/890 [01:02<00:40,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 544/890 [01:02<00:40,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 545/890 [01:02<00:39,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████▏   | 546/890 [01:03<00:39,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████▏   | 547/890 [01:03<00:39,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 548/890 [01:03<00:39,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 549/890 [01:03<00:39,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 550/890 [01:03<00:39,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 551/890 [01:03<00:39,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 552/890 [01:03<00:39,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 553/890 [01:03<00:39,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 554/890 [01:04<00:38,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 555/890 [01:04<00:38,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 556/890 [01:04<00:38,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 557/890 [01:04<00:38,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 558/890 [01:04<00:38,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 559/890 [01:04<00:38,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 560/890 [01:04<00:38,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 561/890 [01:04<00:38,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 562/890 [01:04<00:38,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 563/890 [01:05<00:37,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 564/890 [01:05<00:37,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 565/890 [01:05<00:37,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▎   | 566/890 [01:05<00:37,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▎   | 567/890 [01:05<00:37,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 568/890 [01:05<00:37,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 569/890 [01:05<00:36,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 570/890 [01:05<00:36,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 571/890 [01:06<00:36,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 572/890 [01:06<00:36,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 573/890 [01:06<00:36,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 574/890 [01:06<00:36,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 575/890 [01:06<00:37,  8.30it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 576/890 [01:06<00:36,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 577/890 [01:06<00:36,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 578/890 [01:06<00:35,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 579/890 [01:06<00:35,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 580/890 [01:07<00:35,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 581/890 [01:07<00:35,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 582/890 [01:07<00:35,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 583/890 [01:07<00:35,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 584/890 [01:07<00:35,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 585/890 [01:07<00:34,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 586/890 [01:07<00:34,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 587/890 [01:07<00:34,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 588/890 [01:07<00:34,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 589/890 [01:08<00:34,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▋   | 590/890 [01:08<00:34,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▋   | 591/890 [01:08<00:34,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 592/890 [01:08<00:34,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 593/890 [01:08<00:34,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 594/890 [01:08<00:33,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 595/890 [01:08<00:33,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 596/890 [01:08<00:33,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 597/890 [01:08<00:33,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 598/890 [01:09<00:33,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 599/890 [01:09<00:33,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 600/890 [01:09<00:33,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 601/890 [01:09<00:32,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 602/890 [01:09<00:32,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 603/890 [01:09<00:33,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 604/890 [01:09<00:32,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 605/890 [01:09<00:32,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 606/890 [01:10<00:32,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 607/890 [01:10<00:32,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 608/890 [01:10<00:32,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 609/890 [01:10<00:32,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▊   | 610/890 [01:10<00:32,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▊   | 611/890 [01:10<00:32,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 612/890 [01:10<00:32,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 613/890 [01:10<00:32,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 614/890 [01:10<00:31,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 615/890 [01:11<00:31,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 616/890 [01:11<00:31,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 617/890 [01:11<00:31,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 618/890 [01:11<00:31,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 619/890 [01:11<00:31,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 620/890 [01:11<00:30,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 621/890 [01:11<00:30,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 622/890 [01:11<00:30,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 623/890 [01:11<00:30,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 624/890 [01:12<00:30,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 625/890 [01:12<00:30,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 626/890 [01:12<00:30,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 627/890 [01:12<00:30,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 628/890 [01:12<00:30,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 629/890 [01:12<00:30,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 630/890 [01:12<00:30,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 631/890 [01:12<00:30,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 632/890 [01:13<00:29,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 633/890 [01:13<00:29,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 634/890 [01:13<00:29,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████▏  | 635/890 [01:13<00:29,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████▏  | 636/890 [01:13<00:29,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 637/890 [01:13<00:29,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 638/890 [01:13<00:29,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 639/890 [01:13<00:29,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 640/890 [01:13<00:29,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 641/890 [01:14<00:28,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 642/890 [01:14<00:28,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 643/890 [01:14<00:28,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 644/890 [01:14<00:28,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 645/890 [01:14<00:27,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 646/890 [01:14<00:27,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 647/890 [01:14<00:28,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 648/890 [01:14<00:28,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 649/890 [01:15<00:27,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 650/890 [01:15<00:27,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 651/890 [01:15<00:27,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 652/890 [01:15<00:27,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 653/890 [01:15<00:27,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 654/890 [01:15<00:27,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▎  | 655/890 [01:15<00:26,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▎  | 656/890 [01:15<00:26,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 657/890 [01:15<00:26,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 658/890 [01:16<00:26,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 659/890 [01:16<00:26,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 660/890 [01:16<00:26,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 661/890 [01:16<00:26,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 662/890 [01:16<00:26,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 663/890 [01:16<00:26,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 664/890 [01:16<00:26,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 665/890 [01:16<00:25,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 666/890 [01:16<00:25,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 667/890 [01:17<00:25,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 668/890 [01:17<00:25,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 669/890 [01:17<00:25,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 670/890 [01:17<00:25,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 671/890 [01:17<00:24,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 672/890 [01:17<00:24,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 673/890 [01:17<00:24,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 674/890 [01:17<00:25,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 675/890 [01:18<00:24,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 676/890 [01:18<00:24,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 677/890 [01:18<00:24,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 678/890 [01:18<00:24,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▋  | 679/890 [01:18<00:23,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▋  | 680/890 [01:18<00:23,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 681/890 [01:18<00:23,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 682/890 [01:18<00:23,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 683/890 [01:18<00:23,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 684/890 [01:19<00:23,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 685/890 [01:19<00:23,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 686/890 [01:19<00:23,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 687/890 [01:19<00:23,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 688/890 [01:19<00:23,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 689/890 [01:19<00:22,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 690/890 [01:19<00:22,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 691/890 [01:19<00:22,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 692/890 [01:19<00:22,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 693/890 [01:20<00:22,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 694/890 [01:20<00:22,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 695/890 [01:20<00:22,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 696/890 [01:20<00:22,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 697/890 [01:20<00:22,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 698/890 [01:20<00:22,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▊  | 699/890 [01:20<00:21,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▊  | 700/890 [01:20<00:21,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 701/890 [01:20<00:21,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 702/890 [01:21<00:21,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 703/890 [01:21<00:21,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 704/890 [01:21<00:21,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 705/890 [01:21<00:21,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 706/890 [01:21<00:21,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 707/890 [01:21<00:20,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 708/890 [01:21<00:20,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 709/890 [01:21<00:21,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 710/890 [01:22<00:20,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 711/890 [01:22<00:20,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 712/890 [01:22<00:20,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 713/890 [01:22<00:20,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 714/890 [01:22<00:20,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 715/890 [01:22<00:19,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 716/890 [01:22<00:20,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 717/890 [01:22<00:19,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 718/890 [01:22<00:19,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 719/890 [01:23<00:19,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 720/890 [01:23<00:19,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 721/890 [01:23<00:19,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 722/890 [01:23<00:19,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 723/890 [01:23<00:19,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████▏ | 724/890 [01:23<00:19,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████▏ | 725/890 [01:23<00:18,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 726/890 [01:23<00:18,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 727/890 [01:23<00:18,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 728/890 [01:24<00:18,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 729/890 [01:24<00:18,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 730/890 [01:24<00:18,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 731/890 [01:24<00:18,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 732/890 [01:24<00:18,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 733/890 [01:24<00:18,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 734/890 [01:24<00:17,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 735/890 [01:24<00:17,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 736/890 [01:25<00:17,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 737/890 [01:25<00:17,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 738/890 [01:25<00:17,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 739/890 [01:25<00:17,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 740/890 [01:25<00:17,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 741/890 [01:25<00:17,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 742/890 [01:25<00:17,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 743/890 [01:25<00:17,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▎ | 744/890 [01:25<00:16,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▎ | 745/890 [01:26<00:16,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 746/890 [01:26<00:16,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 747/890 [01:26<00:16,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 748/890 [01:26<00:16,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 749/890 [01:26<00:16,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 750/890 [01:26<00:16,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 751/890 [01:26<00:16,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 752/890 [01:26<00:16,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 753/890 [01:26<00:15,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 754/890 [01:27<00:15,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 755/890 [01:27<00:15,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 756/890 [01:27<00:15,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 757/890 [01:27<00:15,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 758/890 [01:27<00:15,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 759/890 [01:27<00:14,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 760/890 [01:27<00:15,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 761/890 [01:27<00:15,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 762/890 [01:28<00:14,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 763/890 [01:28<00:14,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 764/890 [01:28<00:14,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 765/890 [01:28<00:14,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 766/890 [01:28<00:14,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 767/890 [01:28<00:14,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▋ | 768/890 [01:28<00:14,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▋ | 769/890 [01:28<00:14,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 770/890 [01:28<00:14,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 771/890 [01:29<00:13,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 772/890 [01:29<00:13,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 773/890 [01:29<00:13,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 774/890 [01:29<00:13,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 775/890 [01:29<00:13,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 776/890 [01:29<00:13,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 777/890 [01:29<00:12,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 778/890 [01:29<00:12,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 779/890 [01:29<00:12,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 780/890 [01:30<00:12,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 781/890 [01:30<00:12,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 782/890 [01:30<00:12,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 783/890 [01:30<00:12,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 784/890 [01:30<00:12,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 785/890 [01:30<00:12,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 786/890 [01:30<00:11,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 787/890 [01:30<00:11,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▊ | 788/890 [01:31<00:11,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▊ | 789/890 [01:31<00:11,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 790/890 [01:31<00:11,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 791/890 [01:31<00:11,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 792/890 [01:31<00:11,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 793/890 [01:31<00:11,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 794/890 [01:31<00:11,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 795/890 [01:31<00:10,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 796/890 [01:31<00:10,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 797/890 [01:32<00:10,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 798/890 [01:32<00:10,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 799/890 [01:32<00:10,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 800/890 [01:32<00:10,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 801/890 [01:32<00:10,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 802/890 [01:32<00:10,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 803/890 [01:32<00:10,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 804/890 [01:32<00:09,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 805/890 [01:32<00:09,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 806/890 [01:33<00:09,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 807/890 [01:33<00:09,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 808/890 [01:33<00:09,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 809/890 [01:33<00:09,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 810/890 [01:33<00:09,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 811/890 [01:33<00:09,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 812/890 [01:33<00:09,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████▏| 813/890 [01:33<00:08,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████▏| 814/890 [01:34<00:08,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 815/890 [01:34<00:08,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 816/890 [01:34<00:08,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 817/890 [01:34<00:08,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 818/890 [01:34<00:08,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 819/890 [01:34<00:08,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 820/890 [01:34<00:08,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 821/890 [01:34<00:08,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 822/890 [01:34<00:07,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 823/890 [01:35<00:07,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 824/890 [01:35<00:07,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 825/890 [01:35<00:07,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 826/890 [01:35<00:07,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 827/890 [01:35<00:07,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 828/890 [01:35<00:07,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 829/890 [01:35<00:07,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 830/890 [01:35<00:06,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 831/890 [01:35<00:06,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 832/890 [01:36<00:06,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▎| 833/890 [01:36<00:06,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▎| 834/890 [01:36<00:06,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 835/890 [01:36<00:06,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 836/890 [01:36<00:06,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 837/890 [01:36<00:06,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 838/890 [01:36<00:05,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 839/890 [01:36<00:05,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 840/890 [01:37<00:05,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 841/890 [01:37<00:05,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 842/890 [01:37<00:05,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 843/890 [01:37<00:05,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 844/890 [01:37<00:05,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 845/890 [01:37<00:05,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 846/890 [01:37<00:05,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 847/890 [01:37<00:04,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 848/890 [01:37<00:04,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 849/890 [01:38<00:04,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 850/890 [01:38<00:04,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 851/890 [01:38<00:04,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 852/890 [01:38<00:04,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 853/890 [01:38<00:04,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 854/890 [01:38<00:04,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 855/890 [01:38<00:04,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 856/890 [01:38<00:03,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▋| 857/890 [01:38<00:03,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▋| 858/890 [01:39<00:03,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 859/890 [01:39<00:03,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 860/890 [01:39<00:03,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 861/890 [01:39<00:03,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 862/890 [01:39<00:03,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 863/890 [01:39<00:03,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 864/890 [01:39<00:03,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 865/890 [01:39<00:02,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 866/890 [01:40<00:02,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 867/890 [01:40<00:02,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 868/890 [01:40<00:02,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 869/890 [01:40<00:02,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 870/890 [01:40<00:02,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 871/890 [01:40<00:02,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 872/890 [01:40<00:02,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 873/890 [01:40<00:01,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 874/890 [01:40<00:01,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 875/890 [01:41<00:01,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 876/890 [01:41<00:01,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▊| 877/890 [01:41<00:01,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▊| 878/890 [01:41<00:01,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 879/890 [01:41<00:01,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 880/890 [01:41<00:01,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 881/890 [01:41<00:01,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 882/890 [01:41<00:00,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 883/890 [01:41<00:00,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 884/890 [01:42<00:00,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 885/890 [01:42<00:00,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 886/890 [01:42<00:00,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 887/890 [01:42<00:00,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 888/890 [01:42<00:00,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 889/890 [01:42<00:00,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|██████████| 890/890 [01:42<00:00,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])





input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torc

  0%|          | 0/890 [00:00<?, ?it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  0%|          | 2/890 [00:00<01:16, 11.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  0%|          | 4/890 [00:00<01:30,  9.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 5/890 [00:00<01:34,  9.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 6/890 [00:00<01:38,  8.97it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 7/890 [00:00<01:38,  8.92it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 8/890 [00:00<01:37,  9.05it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 9/890 [00:00<01:38,  8.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 10/890 [00:01<01:41,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 11/890 [00:01<01:42,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|▏         | 12/890 [00:01<01:42,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|▏         | 13/890 [00:01<01:41,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 14/890 [00:01<01:40,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 15/890 [00:01<01:41,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 16/890 [00:01<01:41,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 17/890 [00:01<01:41,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 18/890 [00:02<01:40,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 19/890 [00:02<01:40,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 20/890 [00:02<01:41,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 21/890 [00:02<01:41,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 22/890 [00:02<01:41,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 23/890 [00:02<01:40,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 24/890 [00:02<01:39,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 25/890 [00:02<01:39,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 26/890 [00:02<01:40,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 27/890 [00:03<01:40,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 28/890 [00:03<01:38,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 29/890 [00:03<01:38,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 30/890 [00:03<01:39,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 31/890 [00:03<01:38,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▎         | 32/890 [00:03<01:37,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▎         | 33/890 [00:03<01:38,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 34/890 [00:03<01:39,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 35/890 [00:03<01:39,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 36/890 [00:04<01:38,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 37/890 [00:04<01:38,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 38/890 [00:04<01:39,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 39/890 [00:04<01:39,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 40/890 [00:04<01:39,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 41/890 [00:04<01:39,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 42/890 [00:04<01:37,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 43/890 [00:04<01:37,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 44/890 [00:05<01:37,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 45/890 [00:05<01:37,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 46/890 [00:05<01:36,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 47/890 [00:05<01:37,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 48/890 [00:05<01:37,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 49/890 [00:05<01:38,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 50/890 [00:05<01:38,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 51/890 [00:05<01:37,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 52/890 [00:05<01:36,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 53/890 [00:06<01:37,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 54/890 [00:06<01:37,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 55/890 [00:06<01:37,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▋         | 56/890 [00:06<01:36,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▋         | 57/890 [00:06<01:35,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 58/890 [00:06<01:36,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 59/890 [00:06<01:36,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 60/890 [00:06<01:36,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 61/890 [00:06<01:35,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 62/890 [00:07<01:35,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 63/890 [00:07<01:35,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 64/890 [00:07<01:36,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 65/890 [00:07<01:34,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 66/890 [00:07<01:35,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 67/890 [00:07<01:34,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 68/890 [00:07<01:34,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 69/890 [00:07<01:34,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 70/890 [00:08<01:33,  8.80it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 71/890 [00:08<01:34,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 72/890 [00:08<01:34,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 73/890 [00:08<01:35,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 74/890 [00:08<01:33,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 75/890 [00:08<01:34,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▊         | 76/890 [00:08<01:35,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▊         | 77/890 [00:08<01:34,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 78/890 [00:08<01:34,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 79/890 [00:09<01:34,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 80/890 [00:09<01:33,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 81/890 [00:09<01:33,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 82/890 [00:09<01:33,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 83/890 [00:09<01:33,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 84/890 [00:09<01:31,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 85/890 [00:09<01:32,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 86/890 [00:09<01:33,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 87/890 [00:10<01:33,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 88/890 [00:10<01:32,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 89/890 [00:10<01:31,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 90/890 [00:10<01:32,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 91/890 [00:10<01:32,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 92/890 [00:10<01:31,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 93/890 [00:10<01:31,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 94/890 [00:10<01:31,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 95/890 [00:10<01:31,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 96/890 [00:11<01:31,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 97/890 [00:11<01:30,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 98/890 [00:11<01:30,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 99/890 [00:11<01:31,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 100/890 [00:11<01:30,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█▏        | 101/890 [00:11<01:30,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█▏        | 102/890 [00:11<01:30,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 103/890 [00:11<01:30,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 104/890 [00:11<01:29,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 105/890 [00:12<01:29,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 106/890 [00:12<01:30,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 107/890 [00:12<01:30,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 108/890 [00:12<01:29,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 109/890 [00:12<01:29,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 110/890 [00:12<01:30,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 111/890 [00:12<01:30,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 112/890 [00:12<01:29,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 113/890 [00:12<01:28,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 114/890 [00:13<01:28,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 115/890 [00:13<01:28,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 116/890 [00:13<01:29,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 117/890 [00:13<01:28,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 118/890 [00:13<01:29,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 119/890 [00:13<01:28,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 120/890 [00:13<01:27,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▎        | 121/890 [00:13<01:28,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▎        | 122/890 [00:14<01:29,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 123/890 [00:14<01:29,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 124/890 [00:14<01:27,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 125/890 [00:14<01:27,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 126/890 [00:14<01:28,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 127/890 [00:14<01:28,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 128/890 [00:14<01:27,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 129/890 [00:14<01:27,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 130/890 [00:14<01:27,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 131/890 [00:15<01:28,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 132/890 [00:15<01:28,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 133/890 [00:15<01:27,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 134/890 [00:15<01:26,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 135/890 [00:15<01:27,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 136/890 [00:15<01:27,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 137/890 [00:15<01:27,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 138/890 [00:15<01:26,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 139/890 [00:15<01:26,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 140/890 [00:16<01:27,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 141/890 [00:16<01:27,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 142/890 [00:16<01:27,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 143/890 [00:16<01:26,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 144/890 [00:16<01:25,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▋        | 145/890 [00:16<01:26,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▋        | 146/890 [00:16<01:26,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 147/890 [00:16<01:25,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 148/890 [00:17<01:24,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 149/890 [00:17<01:25,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 150/890 [00:17<01:26,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 151/890 [00:17<01:26,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 152/890 [00:17<01:27,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 153/890 [00:17<01:24,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 154/890 [00:17<01:24,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 155/890 [00:17<01:25,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 156/890 [00:17<01:25,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 157/890 [00:18<01:24,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 158/890 [00:18<01:24,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 159/890 [00:18<01:24,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 160/890 [00:18<01:24,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 161/890 [00:18<01:24,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 162/890 [00:18<01:23,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 163/890 [00:18<01:23,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 164/890 [00:18<01:23,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▊        | 165/890 [00:19<01:24,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▊        | 166/890 [00:19<01:23,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 167/890 [00:19<01:23,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 168/890 [00:19<01:23,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 169/890 [00:19<01:23,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 170/890 [00:19<01:23,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 171/890 [00:19<01:21,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 172/890 [00:19<01:22,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 173/890 [00:19<01:23,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 174/890 [00:20<01:22,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 175/890 [00:20<01:21,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 176/890 [00:20<01:22,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 177/890 [00:20<01:23,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 178/890 [00:20<01:23,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 179/890 [00:20<01:22,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 180/890 [00:20<01:21,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 181/890 [00:20<01:21,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 182/890 [00:20<01:21,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 183/890 [00:21<01:20,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 184/890 [00:21<01:20,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 185/890 [00:21<01:21,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 186/890 [00:21<01:27,  8.09it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 187/890 [00:21<01:24,  8.33it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 188/890 [00:21<01:22,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 189/890 [00:21<01:21,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██▏       | 190/890 [00:21<01:21,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██▏       | 191/890 [00:22<01:21,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 192/890 [00:22<01:20,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 193/890 [00:22<01:20,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 194/890 [00:22<01:20,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 195/890 [00:22<01:20,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 196/890 [00:22<01:19,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 197/890 [00:22<01:19,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 198/890 [00:22<01:19,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 199/890 [00:22<01:20,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 200/890 [00:23<01:19,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 201/890 [00:23<01:18,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 202/890 [00:23<01:19,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 203/890 [00:23<01:20,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 204/890 [00:23<01:20,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 205/890 [00:23<01:19,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 206/890 [00:23<01:18,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 207/890 [00:23<01:18,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 208/890 [00:23<01:19,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 209/890 [00:24<01:19,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▎       | 210/890 [00:24<01:18,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▎       | 211/890 [00:24<01:18,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 212/890 [00:24<01:18,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 213/890 [00:24<01:18,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 214/890 [00:24<01:17,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 215/890 [00:24<01:17,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 216/890 [00:24<01:17,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 217/890 [00:25<01:17,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 218/890 [00:25<01:16,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 219/890 [00:25<01:16,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 220/890 [00:25<01:17,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 221/890 [00:25<01:17,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 222/890 [00:25<01:17,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 223/890 [00:25<01:16,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 224/890 [00:25<01:16,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 225/890 [00:25<01:17,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 226/890 [00:26<01:16,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 227/890 [00:26<01:15,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 228/890 [00:26<01:16,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 229/890 [00:26<01:16,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 230/890 [00:26<01:16,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 231/890 [00:26<01:15,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 232/890 [00:26<01:15,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 233/890 [00:26<01:15,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▋       | 234/890 [00:26<01:16,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▋       | 235/890 [00:27<01:16,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 236/890 [00:27<01:16,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 237/890 [00:27<01:14,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 238/890 [00:27<01:15,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 239/890 [00:27<01:15,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 240/890 [00:27<01:16,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 241/890 [00:27<01:15,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 242/890 [00:27<01:14,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 243/890 [00:28<01:14,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 244/890 [00:28<01:14,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 245/890 [00:28<01:14,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 246/890 [00:28<01:14,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 247/890 [00:28<01:14,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 248/890 [00:28<01:14,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 249/890 [00:28<01:15,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 250/890 [00:28<01:14,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 251/890 [00:28<01:13,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 252/890 [00:29<01:13,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 253/890 [00:29<01:13,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▊       | 254/890 [00:29<01:13,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▊       | 255/890 [00:29<01:13,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 256/890 [00:29<01:12,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 257/890 [00:29<01:13,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 258/890 [00:29<01:13,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 259/890 [00:29<01:13,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 260/890 [00:29<01:13,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 261/890 [00:30<01:12,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 262/890 [00:30<01:12,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 263/890 [00:30<01:12,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 264/890 [00:30<01:12,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 265/890 [00:30<01:11,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 266/890 [00:30<01:12,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 267/890 [00:30<01:12,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 268/890 [00:30<01:12,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 269/890 [00:31<01:11,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 270/890 [00:31<01:11,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 271/890 [00:31<01:11,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 272/890 [00:31<01:11,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 273/890 [00:31<01:11,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 274/890 [00:31<01:11,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 275/890 [00:31<01:11,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 276/890 [00:31<01:11,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 277/890 [00:31<01:11,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 278/890 [00:32<01:11,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███▏      | 279/890 [00:32<01:10,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███▏      | 280/890 [00:32<01:10,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 281/890 [00:32<01:10,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 282/890 [00:32<01:10,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 283/890 [00:32<01:10,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 284/890 [00:32<01:09,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 285/890 [00:32<01:09,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 286/890 [00:33<01:10,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 287/890 [00:33<01:10,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 288/890 [00:33<01:09,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 289/890 [00:33<01:08,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 290/890 [00:33<01:09,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 291/890 [00:33<01:09,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 292/890 [00:33<01:08,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 293/890 [00:33<01:08,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 294/890 [00:33<01:08,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 295/890 [00:34<01:09,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 296/890 [00:34<01:08,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 297/890 [00:34<01:08,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 298/890 [00:34<01:08,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▎      | 299/890 [00:34<01:08,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▎      | 300/890 [00:34<01:08,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 301/890 [00:34<01:08,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 302/890 [00:34<01:07,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 303/890 [00:34<01:07,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 304/890 [00:35<01:08,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 305/890 [00:35<01:08,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 306/890 [00:35<01:07,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 307/890 [00:35<01:06,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 308/890 [00:35<01:07,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 309/890 [00:35<01:07,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 310/890 [00:35<01:07,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 311/890 [00:35<01:06,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 312/890 [00:36<01:06,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 313/890 [00:36<01:06,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 314/890 [00:36<01:07,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 315/890 [00:36<01:07,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 316/890 [00:36<01:05,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 317/890 [00:36<01:05,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 318/890 [00:36<01:05,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 319/890 [00:36<01:04,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 320/890 [00:36<01:05,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 321/890 [00:37<01:05,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 322/890 [00:37<01:06,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▋      | 323/890 [00:37<01:05,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▋      | 324/890 [00:37<01:04,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 325/890 [00:37<01:04,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 326/890 [00:37<01:05,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 327/890 [00:37<01:05,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 328/890 [00:37<01:04,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 329/890 [00:37<01:04,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 330/890 [00:38<01:04,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 331/890 [00:38<01:05,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 332/890 [00:38<01:04,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 333/890 [00:38<01:04,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 334/890 [00:38<01:03,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 335/890 [00:38<01:04,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 336/890 [00:38<01:04,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 337/890 [00:38<01:03,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 338/890 [00:39<01:03,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 339/890 [00:39<01:03,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 340/890 [00:39<01:04,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 341/890 [00:39<01:03,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 342/890 [00:39<01:02,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▊      | 343/890 [00:39<01:02,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▊      | 344/890 [00:39<01:03,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 345/890 [00:39<01:03,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 346/890 [00:39<01:02,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 347/890 [00:40<01:02,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 348/890 [00:40<01:03,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 349/890 [00:40<01:03,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 350/890 [00:40<01:03,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 351/890 [00:40<01:02,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 352/890 [00:40<01:03,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 353/890 [00:40<01:01,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 354/890 [00:40<01:01,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 355/890 [00:40<01:02,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 356/890 [00:41<01:01,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 357/890 [00:41<01:00,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 358/890 [00:41<01:01,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 359/890 [00:41<01:01,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 360/890 [00:41<01:01,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 361/890 [00:41<01:00,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 362/890 [00:41<01:00,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 363/890 [00:41<01:00,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 364/890 [00:42<01:00,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 365/890 [00:42<01:00,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 366/890 [00:42<01:00,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 367/890 [00:42<01:00,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████▏     | 368/890 [00:42<01:00,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████▏     | 369/890 [00:42<01:00,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 370/890 [00:42<01:00,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 371/890 [00:42<00:59,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 372/890 [00:42<00:59,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 373/890 [00:43<01:00,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 374/890 [00:43<01:00,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 375/890 [00:43<00:59,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 376/890 [00:43<01:00,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 377/890 [00:43<00:59,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 378/890 [00:43<00:59,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 379/890 [00:43<00:59,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 380/890 [00:43<00:58,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 381/890 [00:43<00:58,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 382/890 [00:44<00:58,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 383/890 [00:44<00:59,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 384/890 [00:44<00:58,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 385/890 [00:44<00:57,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 386/890 [00:44<00:58,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 387/890 [00:44<00:58,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▎     | 388/890 [00:44<00:58,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▎     | 389/890 [00:44<00:58,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 390/890 [00:45<00:57,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 391/890 [00:45<00:57,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 392/890 [00:45<00:58,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 393/890 [00:45<00:57,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 394/890 [00:45<00:57,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 395/890 [00:45<00:56,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 396/890 [00:45<00:57,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 397/890 [00:45<00:57,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 398/890 [00:45<00:56,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 399/890 [00:46<00:56,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 400/890 [00:46<00:56,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 401/890 [00:46<00:56,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 402/890 [00:46<00:55,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 403/890 [00:46<00:55,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 404/890 [00:46<00:56,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 405/890 [00:46<00:56,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 406/890 [00:46<00:56,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 407/890 [00:46<00:55,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 408/890 [00:47<00:55,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 409/890 [00:47<00:55,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 410/890 [00:47<00:55,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 411/890 [00:47<00:55,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▋     | 412/890 [00:47<00:54,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▋     | 413/890 [00:47<00:55,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 414/890 [00:47<00:55,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 415/890 [00:47<00:55,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 416/890 [00:48<00:55,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 417/890 [00:48<00:54,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 418/890 [00:48<00:54,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 419/890 [00:48<00:54,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 420/890 [00:48<00:54,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 421/890 [00:48<00:54,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 422/890 [00:48<00:53,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 423/890 [00:48<00:53,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 424/890 [00:48<00:53,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 425/890 [00:49<00:53,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 426/890 [00:49<00:52,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 427/890 [00:49<00:53,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 428/890 [00:49<00:53,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 429/890 [00:49<00:53,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 430/890 [00:49<00:54,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 431/890 [00:49<00:53,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▊     | 432/890 [00:49<00:52,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▊     | 433/890 [00:50<00:52,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 434/890 [00:50<00:52,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 435/890 [00:50<00:52,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 436/890 [00:50<00:51,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 437/890 [00:50<00:52,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 438/890 [00:50<00:52,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 439/890 [00:50<00:52,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 440/890 [00:50<00:52,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 441/890 [00:50<00:52,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 442/890 [00:51<00:51,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 443/890 [00:51<00:51,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 444/890 [00:51<00:51,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 445/890 [00:51<00:51,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 446/890 [00:51<00:50,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 447/890 [00:51<00:50,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 448/890 [00:51<00:51,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 449/890 [00:51<00:50,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 450/890 [00:51<00:50,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 451/890 [00:52<00:50,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 452/890 [00:52<00:50,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 453/890 [00:52<00:50,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 454/890 [00:52<00:51,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 455/890 [00:52<00:49,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 456/890 [00:52<00:49,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████▏    | 457/890 [00:52<00:50,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████▏    | 458/890 [00:52<00:50,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 459/890 [00:53<00:49,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 460/890 [00:53<00:49,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 461/890 [00:53<00:49,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 462/890 [00:53<00:50,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 463/890 [00:53<00:50,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 464/890 [00:53<00:49,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 465/890 [00:53<00:48,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 466/890 [00:53<00:49,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 467/890 [00:53<00:49,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 468/890 [00:54<00:49,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 469/890 [00:54<00:49,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 470/890 [00:54<00:48,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 471/890 [00:54<00:48,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 472/890 [00:54<00:48,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 473/890 [00:54<00:48,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 474/890 [00:54<00:48,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 475/890 [00:54<00:47,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 476/890 [00:54<00:47,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▎    | 477/890 [00:55<00:47,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▎    | 478/890 [00:55<00:48,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 479/890 [00:55<00:48,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 480/890 [00:55<00:47,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 481/890 [00:55<00:46,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 482/890 [00:55<00:47,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 483/890 [00:55<00:47,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 484/890 [00:55<00:47,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 485/890 [00:56<00:47,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 486/890 [00:56<00:46,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 487/890 [00:56<00:46,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 488/890 [00:56<00:46,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 489/890 [00:56<00:46,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 490/890 [00:56<00:46,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 491/890 [00:56<00:46,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 492/890 [00:56<00:45,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 493/890 [00:56<00:45,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 494/890 [00:57<00:45,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 495/890 [00:57<00:45,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 496/890 [00:57<00:45,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 497/890 [00:57<00:45,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 498/890 [00:57<00:45,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 499/890 [00:57<00:45,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 500/890 [00:57<00:44,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▋    | 501/890 [00:57<00:44,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▋    | 502/890 [00:57<00:44,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 503/890 [00:58<00:45,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 504/890 [00:58<00:44,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 505/890 [00:58<00:44,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 506/890 [00:58<00:44,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 507/890 [00:58<00:44,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 508/890 [00:58<00:44,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 509/890 [00:58<00:43,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 510/890 [00:58<00:43,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 511/890 [00:59<00:43,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 512/890 [00:59<00:44,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 513/890 [00:59<00:43,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 514/890 [00:59<00:43,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 515/890 [00:59<00:43,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 516/890 [00:59<00:43,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 517/890 [00:59<00:44,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 518/890 [00:59<00:42,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 519/890 [00:59<00:42,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 520/890 [01:00<00:42,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▊    | 521/890 [01:00<00:42,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▊    | 522/890 [01:00<00:42,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 523/890 [01:00<00:41,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 524/890 [01:00<00:41,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 525/890 [01:00<00:42,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 526/890 [01:00<00:42,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 527/890 [01:00<00:42,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 528/890 [01:00<00:41,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 529/890 [01:01<00:41,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 530/890 [01:01<00:41,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 531/890 [01:01<00:41,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 532/890 [01:01<00:40,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 533/890 [01:01<00:40,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 534/890 [01:01<00:41,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 535/890 [01:01<00:40,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 536/890 [01:01<00:40,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 537/890 [01:02<00:40,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 538/890 [01:02<00:40,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 539/890 [01:02<00:40,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 540/890 [01:02<00:40,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 541/890 [01:02<00:40,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 542/890 [01:02<00:40,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 543/890 [01:02<00:40,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 544/890 [01:02<00:40,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 545/890 [01:02<00:39,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████▏   | 546/890 [01:03<00:39,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████▏   | 547/890 [01:03<00:39,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 548/890 [01:03<00:39,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 549/890 [01:03<00:39,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 550/890 [01:03<00:39,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 551/890 [01:03<00:39,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 552/890 [01:03<00:39,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 553/890 [01:03<00:39,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 554/890 [01:04<00:38,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 555/890 [01:04<00:38,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 556/890 [01:04<00:38,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 557/890 [01:04<00:38,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 558/890 [01:04<00:38,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 559/890 [01:04<00:38,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 560/890 [01:04<00:38,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 561/890 [01:04<00:38,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 562/890 [01:04<00:38,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 563/890 [01:05<00:38,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 564/890 [01:05<00:38,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 565/890 [01:05<00:37,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▎   | 566/890 [01:05<00:37,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▎   | 567/890 [01:05<00:37,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 568/890 [01:05<00:37,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 569/890 [01:05<00:37,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 570/890 [01:05<00:36,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 571/890 [01:05<00:36,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 572/890 [01:06<00:36,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 573/890 [01:06<00:36,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 574/890 [01:06<00:36,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 575/890 [01:06<00:36,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 576/890 [01:06<00:36,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 577/890 [01:06<00:36,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 578/890 [01:06<00:36,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 579/890 [01:06<00:35,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 580/890 [01:07<00:35,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 581/890 [01:07<00:35,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 582/890 [01:07<00:35,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 583/890 [01:07<00:35,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 584/890 [01:07<00:35,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 585/890 [01:07<00:34,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 586/890 [01:07<00:35,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 587/890 [01:07<00:35,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 588/890 [01:07<00:34,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 589/890 [01:08<00:34,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▋   | 590/890 [01:08<00:34,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▋   | 591/890 [01:08<00:34,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 592/890 [01:08<00:34,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 593/890 [01:08<00:33,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 594/890 [01:08<00:34,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 595/890 [01:08<00:34,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 596/890 [01:08<00:33,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 597/890 [01:08<00:33,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 598/890 [01:09<00:33,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 599/890 [01:09<00:33,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 600/890 [01:09<00:33,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 601/890 [01:09<00:32,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 602/890 [01:09<00:33,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 603/890 [01:09<00:33,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 604/890 [01:09<00:33,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 605/890 [01:09<00:32,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 606/890 [01:10<00:32,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 607/890 [01:10<00:32,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 608/890 [01:10<00:33,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 609/890 [01:10<00:32,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▊   | 610/890 [01:10<00:32,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▊   | 611/890 [01:10<00:31,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 612/890 [01:10<00:32,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 613/890 [01:10<00:32,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 614/890 [01:10<00:31,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 615/890 [01:11<00:31,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 616/890 [01:11<00:31,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 617/890 [01:11<00:31,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 618/890 [01:11<00:31,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 619/890 [01:11<00:31,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 620/890 [01:11<00:31,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 621/890 [01:11<00:31,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 622/890 [01:11<00:30,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 623/890 [01:11<00:30,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 624/890 [01:12<00:30,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 625/890 [01:12<00:30,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 626/890 [01:12<00:30,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 627/890 [01:12<00:30,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 628/890 [01:12<00:30,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 629/890 [01:12<00:30,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 630/890 [01:12<00:29,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 631/890 [01:12<00:29,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 632/890 [01:13<00:29,  8.79it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 633/890 [01:13<00:29,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 634/890 [01:13<00:29,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████▏  | 635/890 [01:13<00:29,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████▏  | 636/890 [01:13<00:29,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 637/890 [01:13<00:29,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 638/890 [01:13<00:29,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 639/890 [01:13<00:28,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 640/890 [01:13<00:28,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 641/890 [01:14<00:28,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 642/890 [01:14<00:28,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 643/890 [01:14<00:28,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 644/890 [01:14<00:28,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 645/890 [01:14<00:28,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 646/890 [01:14<00:28,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 647/890 [01:14<00:28,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 648/890 [01:14<00:28,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 649/890 [01:14<00:27,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 650/890 [01:15<00:27,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 651/890 [01:15<00:27,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 652/890 [01:15<00:27,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 653/890 [01:15<00:27,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 654/890 [01:15<00:27,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▎  | 655/890 [01:15<00:27,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▎  | 656/890 [01:15<00:27,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 657/890 [01:15<00:27,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 658/890 [01:16<00:26,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 659/890 [01:16<00:26,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 660/890 [01:16<00:26,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 661/890 [01:16<00:26,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 662/890 [01:16<00:26,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 663/890 [01:16<00:26,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 664/890 [01:16<00:26,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 665/890 [01:16<00:26,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 666/890 [01:16<00:25,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 667/890 [01:17<00:25,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 668/890 [01:17<00:25,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 669/890 [01:17<00:25,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 670/890 [01:17<00:25,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 671/890 [01:17<00:25,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 672/890 [01:17<00:25,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 673/890 [01:17<00:24,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 674/890 [01:17<00:25,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 675/890 [01:17<00:24,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 676/890 [01:18<00:24,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 677/890 [01:18<00:24,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 678/890 [01:18<00:24,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▋  | 679/890 [01:18<00:23,  8.81it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▋  | 680/890 [01:18<00:24,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 681/890 [01:18<00:24,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 682/890 [01:18<00:24,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 683/890 [01:18<00:24,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 684/890 [01:19<00:23,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 685/890 [01:19<00:23,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 686/890 [01:19<00:23,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 687/890 [01:19<00:23,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 688/890 [01:19<00:23,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 689/890 [01:19<00:23,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 690/890 [01:19<00:23,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 691/890 [01:19<00:23,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 692/890 [01:19<00:23,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 693/890 [01:20<00:22,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 694/890 [01:20<00:22,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 695/890 [01:20<00:22,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 696/890 [01:20<00:22,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 697/890 [01:20<00:22,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 698/890 [01:20<00:21,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▊  | 699/890 [01:20<00:21,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▊  | 700/890 [01:20<00:22,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 701/890 [01:20<00:22,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 702/890 [01:21<00:21,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 703/890 [01:21<00:21,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 704/890 [01:21<00:21,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 705/890 [01:21<00:21,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 706/890 [01:21<00:21,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 707/890 [01:21<00:21,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 708/890 [01:21<00:20,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 709/890 [01:21<00:20,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 710/890 [01:22<00:20,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 711/890 [01:22<00:20,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 712/890 [01:22<00:20,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 713/890 [01:22<00:20,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 714/890 [01:22<00:20,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 715/890 [01:22<00:20,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 716/890 [01:22<00:19,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 717/890 [01:22<00:21,  7.90it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 718/890 [01:22<00:20,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 719/890 [01:23<00:20,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 720/890 [01:23<00:20,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 721/890 [01:23<00:19,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 722/890 [01:23<00:19,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 723/890 [01:23<00:19,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████▏ | 724/890 [01:23<00:19,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████▏ | 725/890 [01:23<00:19,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 726/890 [01:23<00:18,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 727/890 [01:24<00:18,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 728/890 [01:24<00:18,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 729/890 [01:24<00:18,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 730/890 [01:24<00:18,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 731/890 [01:24<00:18,  8.77it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 732/890 [01:24<00:18,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 733/890 [01:24<00:18,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 734/890 [01:24<00:18,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 735/890 [01:24<00:17,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 736/890 [01:25<00:17,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 737/890 [01:25<00:17,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 738/890 [01:25<00:17,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 739/890 [01:25<00:17,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 740/890 [01:25<00:17,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 741/890 [01:25<00:17,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 742/890 [01:25<00:17,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 743/890 [01:25<00:16,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▎ | 744/890 [01:25<00:16,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▎ | 745/890 [01:26<00:16,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 746/890 [01:26<00:16,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 747/890 [01:26<00:16,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 748/890 [01:26<00:16,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 749/890 [01:26<00:16,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 750/890 [01:26<00:16,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 751/890 [01:26<00:16,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 752/890 [01:26<00:15,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 753/890 [01:27<00:15,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 754/890 [01:27<00:15,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 755/890 [01:27<00:15,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 756/890 [01:27<00:15,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 757/890 [01:27<00:15,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 758/890 [01:27<00:15,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 759/890 [01:27<00:15,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 760/890 [01:27<00:15,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 761/890 [01:27<00:15,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 762/890 [01:28<00:14,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 763/890 [01:28<00:14,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 764/890 [01:28<00:14,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 765/890 [01:28<00:14,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 766/890 [01:28<00:14,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 767/890 [01:28<00:14,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▋ | 768/890 [01:28<00:13,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▋ | 769/890 [01:28<00:13,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 770/890 [01:28<00:13,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 771/890 [01:29<00:13,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 772/890 [01:29<00:13,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 773/890 [01:29<00:13,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 774/890 [01:29<00:13,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 775/890 [01:29<00:13,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 776/890 [01:29<00:13,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 777/890 [01:29<00:12,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 778/890 [01:29<00:12,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 779/890 [01:30<00:12,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 780/890 [01:30<00:12,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 781/890 [01:30<00:12,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 782/890 [01:30<00:12,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 783/890 [01:30<00:12,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 784/890 [01:30<00:12,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 785/890 [01:30<00:12,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 786/890 [01:30<00:11,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 787/890 [01:30<00:11,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▊ | 788/890 [01:31<00:11,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▊ | 789/890 [01:31<00:11,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 790/890 [01:31<00:11,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 791/890 [01:31<00:11,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 792/890 [01:31<00:11,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 793/890 [01:31<00:11,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 794/890 [01:31<00:11,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 795/890 [01:31<00:10,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 796/890 [01:31<00:10,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 797/890 [01:32<00:10,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 798/890 [01:32<00:10,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 799/890 [01:32<00:10,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 800/890 [01:32<00:10,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 801/890 [01:32<00:10,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 802/890 [01:32<00:10,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 803/890 [01:32<00:10,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 804/890 [01:32<00:09,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 805/890 [01:33<00:09,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 806/890 [01:33<00:09,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 807/890 [01:33<00:09,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 808/890 [01:33<00:09,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 809/890 [01:33<00:09,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 810/890 [01:33<00:09,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 811/890 [01:33<00:09,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 812/890 [01:33<00:09,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████▏| 813/890 [01:33<00:08,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████▏| 814/890 [01:34<00:08,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 815/890 [01:34<00:08,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 816/890 [01:34<00:08,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 817/890 [01:34<00:08,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 818/890 [01:34<00:08,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 819/890 [01:34<00:08,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 820/890 [01:34<00:08,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 821/890 [01:34<00:08,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 822/890 [01:35<00:08,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 823/890 [01:35<00:07,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 824/890 [01:35<00:07,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 825/890 [01:35<00:07,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 826/890 [01:35<00:07,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 827/890 [01:35<00:07,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 828/890 [01:35<00:07,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 829/890 [01:35<00:07,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 830/890 [01:35<00:06,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 831/890 [01:36<00:06,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 832/890 [01:36<00:06,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▎| 833/890 [01:36<00:06,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▎| 834/890 [01:36<00:06,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 835/890 [01:36<00:06,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 836/890 [01:36<00:06,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 837/890 [01:36<00:06,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 838/890 [01:36<00:06,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 839/890 [01:36<00:05,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 840/890 [01:37<00:05,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 841/890 [01:37<00:05,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 842/890 [01:37<00:05,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 843/890 [01:37<00:05,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 844/890 [01:37<00:05,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 845/890 [01:37<00:05,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 846/890 [01:37<00:05,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 847/890 [01:37<00:05,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 848/890 [01:38<00:04,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 849/890 [01:38<00:04,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 850/890 [01:38<00:04,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 851/890 [01:38<00:04,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 852/890 [01:38<00:04,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 853/890 [01:38<00:04,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 854/890 [01:38<00:04,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 855/890 [01:38<00:04,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 856/890 [01:38<00:03,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▋| 857/890 [01:39<00:03,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▋| 858/890 [01:39<00:03,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 859/890 [01:39<00:03,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 860/890 [01:39<00:03,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 861/890 [01:39<00:03,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 862/890 [01:39<00:03,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 863/890 [01:39<00:03,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 864/890 [01:39<00:02,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 865/890 [01:39<00:02,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 866/890 [01:40<00:02,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 867/890 [01:40<00:02,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 868/890 [01:40<00:02,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 869/890 [01:40<00:02,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 870/890 [01:40<00:02,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 871/890 [01:40<00:02,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 872/890 [01:40<00:02,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 873/890 [01:40<00:01,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 874/890 [01:41<00:01,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 875/890 [01:41<00:01,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 876/890 [01:41<00:01,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▊| 877/890 [01:41<00:01,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▊| 878/890 [01:41<00:01,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 879/890 [01:41<00:01,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 880/890 [01:41<00:01,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 881/890 [01:41<00:01,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 882/890 [01:41<00:00,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 883/890 [01:42<00:00,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 884/890 [01:42<00:00,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 885/890 [01:42<00:00,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 886/890 [01:42<00:00,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 887/890 [01:42<00:00,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 888/890 [01:42<00:00,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 889/890 [01:42<00:00,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|██████████| 890/890 [01:42<00:00,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])





input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torc

  0%|          | 0/890 [00:00<?, ?it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  0%|          | 2/890 [00:00<01:15, 11.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  0%|          | 4/890 [00:00<01:30,  9.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 5/890 [00:00<01:35,  9.31it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 6/890 [00:00<01:37,  9.06it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 7/890 [00:00<01:39,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 8/890 [00:00<01:39,  8.86it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 9/890 [00:00<01:38,  8.91it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 10/890 [00:01<01:41,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 11/890 [00:01<01:42,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|▏         | 12/890 [00:01<01:42,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|▏         | 13/890 [00:01<01:42,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 14/890 [00:01<01:42,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 15/890 [00:01<01:42,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 16/890 [00:01<01:40,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 17/890 [00:01<01:40,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 18/890 [00:02<01:41,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 19/890 [00:02<01:42,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 20/890 [00:02<01:42,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 21/890 [00:02<01:41,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 22/890 [00:02<01:40,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 23/890 [00:02<01:40,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 24/890 [00:02<01:40,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 25/890 [00:02<01:41,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 26/890 [00:02<01:40,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 27/890 [00:03<01:39,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 28/890 [00:03<01:39,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 29/890 [00:03<01:40,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 30/890 [00:03<01:40,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 31/890 [00:03<01:42,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▎         | 32/890 [00:03<01:40,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▎         | 33/890 [00:03<01:39,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 34/890 [00:03<01:39,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 35/890 [00:04<01:39,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 36/890 [00:04<01:39,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 37/890 [00:04<01:40,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 38/890 [00:04<01:39,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 39/890 [00:04<01:38,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 40/890 [00:04<01:38,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 41/890 [00:04<01:39,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 42/890 [00:04<01:38,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 43/890 [00:04<01:38,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 44/890 [00:05<01:38,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 45/890 [00:05<01:37,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 46/890 [00:05<01:38,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 47/890 [00:05<01:38,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 48/890 [00:05<01:40,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 49/890 [00:05<01:39,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 50/890 [00:05<01:39,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 51/890 [00:05<01:38,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 52/890 [00:06<01:36,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 53/890 [00:06<01:37,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 54/890 [00:06<01:37,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 55/890 [00:06<01:38,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▋         | 56/890 [00:06<01:39,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▋         | 57/890 [00:06<01:38,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 58/890 [00:06<01:39,  8.36it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 59/890 [00:06<01:39,  8.36it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 60/890 [00:06<01:38,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 61/890 [00:07<01:38,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 62/890 [00:07<01:38,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 63/890 [00:07<01:37,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 64/890 [00:07<01:35,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 65/890 [00:07<01:36,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 66/890 [00:07<01:37,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 67/890 [00:07<01:37,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 68/890 [00:07<01:36,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 69/890 [00:08<01:36,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 70/890 [00:08<01:37,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 71/890 [00:08<01:35,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 72/890 [00:08<01:34,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 73/890 [00:08<01:34,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 74/890 [00:08<01:35,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 75/890 [00:08<01:35,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▊         | 76/890 [00:08<01:36,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▊         | 77/890 [00:08<01:36,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 78/890 [00:09<01:36,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 79/890 [00:09<01:35,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 80/890 [00:09<01:33,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 81/890 [00:09<01:33,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 82/890 [00:09<01:34,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 83/890 [00:09<01:35,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 84/890 [00:09<01:35,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 85/890 [00:09<01:35,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 86/890 [00:10<01:35,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 87/890 [00:10<01:35,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 88/890 [00:10<01:34,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 89/890 [00:10<01:32,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 90/890 [00:10<01:32,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 91/890 [00:10<01:32,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 92/890 [00:10<01:32,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 93/890 [00:10<01:32,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 94/890 [00:10<01:32,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 95/890 [00:11<01:32,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 96/890 [00:11<01:33,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 97/890 [00:11<01:32,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 98/890 [00:11<01:32,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 99/890 [00:11<01:32,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 100/890 [00:11<01:30,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█▏        | 101/890 [00:11<01:31,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█▏        | 102/890 [00:11<01:31,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 103/890 [00:11<01:31,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 104/890 [00:12<01:30,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 105/890 [00:12<01:30,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 106/890 [00:12<01:31,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 107/890 [00:12<01:31,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 108/890 [00:12<01:31,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 109/890 [00:12<01:32,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 110/890 [00:12<01:31,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 111/890 [00:12<01:30,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 112/890 [00:13<01:29,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 113/890 [00:13<01:30,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 114/890 [00:13<01:30,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 115/890 [00:13<01:30,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 116/890 [00:13<01:29,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 117/890 [00:13<01:30,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 118/890 [00:13<01:30,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 119/890 [00:13<01:30,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 120/890 [00:13<01:30,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▎        | 121/890 [00:14<01:30,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▎        | 122/890 [00:14<01:30,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 123/890 [00:14<01:28,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 124/890 [00:14<01:28,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 125/890 [00:14<01:28,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 126/890 [00:14<01:29,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 127/890 [00:14<01:29,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 128/890 [00:14<01:29,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 129/890 [00:15<01:28,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 130/890 [00:15<01:28,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 131/890 [00:15<01:28,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 132/890 [00:15<01:28,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 133/890 [00:15<01:28,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 134/890 [00:15<01:28,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 135/890 [00:15<01:27,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 136/890 [00:15<01:27,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 137/890 [00:15<01:27,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 138/890 [00:16<01:28,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 139/890 [00:16<01:28,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 140/890 [00:16<01:27,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 141/890 [00:16<01:26,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 142/890 [00:16<01:26,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 143/890 [00:16<01:26,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 144/890 [00:16<01:27,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▋        | 145/890 [00:16<01:27,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▋        | 146/890 [00:17<01:27,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 147/890 [00:17<01:27,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 148/890 [00:17<01:26,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 149/890 [00:17<01:25,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 150/890 [00:17<01:26,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 151/890 [00:17<01:26,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 152/890 [00:17<01:27,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 153/890 [00:17<01:27,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 154/890 [00:17<01:26,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 155/890 [00:18<01:25,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 156/890 [00:18<01:25,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 157/890 [00:18<01:25,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 158/890 [00:18<01:25,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 159/890 [00:18<01:25,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 160/890 [00:18<01:24,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 161/890 [00:18<01:24,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 162/890 [00:18<01:25,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 163/890 [00:18<01:24,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 164/890 [00:19<01:24,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▊        | 165/890 [00:19<01:25,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▊        | 166/890 [00:19<01:24,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 167/890 [00:19<01:23,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 168/890 [00:19<01:23,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 169/890 [00:19<01:23,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 170/890 [00:19<01:24,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 171/890 [00:19<01:24,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 172/890 [00:20<01:25,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 173/890 [00:20<01:25,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 174/890 [00:20<01:25,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 175/890 [00:20<01:25,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 176/890 [00:20<01:24,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 177/890 [00:20<01:23,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 178/890 [00:20<01:22,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 179/890 [00:20<01:23,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 180/890 [00:20<01:24,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 181/890 [00:21<01:24,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 182/890 [00:21<01:24,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 183/890 [00:21<01:23,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 184/890 [00:21<01:23,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 185/890 [00:21<01:23,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 186/890 [00:21<01:21,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 187/890 [00:21<01:20,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 188/890 [00:21<01:21,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 189/890 [00:22<01:22,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██▏       | 190/890 [00:22<01:23,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██▏       | 191/890 [00:22<01:22,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 192/890 [00:22<01:20,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 193/890 [00:22<01:20,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 194/890 [00:22<01:21,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 195/890 [00:22<01:21,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 196/890 [00:22<01:21,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 197/890 [00:22<01:21,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 198/890 [00:23<01:22,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 199/890 [00:23<01:19,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 200/890 [00:23<01:19,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 201/890 [00:23<01:20,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 202/890 [00:23<01:21,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 203/890 [00:23<01:20,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 204/890 [00:23<01:20,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 205/890 [00:23<01:19,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 206/890 [00:24<01:18,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 207/890 [00:24<01:19,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 208/890 [00:24<01:18,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 209/890 [00:24<01:17,  8.74it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▎       | 210/890 [00:24<01:18,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▎       | 211/890 [00:24<01:19,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 212/890 [00:24<01:19,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 213/890 [00:24<01:19,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 214/890 [00:24<01:19,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 215/890 [00:25<01:18,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 216/890 [00:25<01:18,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 217/890 [00:25<01:17,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 218/890 [00:25<01:18,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 219/890 [00:25<01:18,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 220/890 [00:25<01:19,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 221/890 [00:25<01:19,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 222/890 [00:25<01:19,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 223/890 [00:26<01:19,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 224/890 [00:26<01:18,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 225/890 [00:26<01:16,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 226/890 [00:26<01:17,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 227/890 [00:26<01:17,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 228/890 [00:26<01:17,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 229/890 [00:26<01:17,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 230/890 [00:26<01:17,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 231/890 [00:26<01:16,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 232/890 [00:27<01:16,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 233/890 [00:27<01:16,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▋       | 234/890 [00:27<01:16,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▋       | 235/890 [00:27<01:17,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 236/890 [00:27<01:16,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 237/890 [00:27<01:15,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 238/890 [00:27<01:16,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 239/890 [00:27<01:15,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 240/890 [00:28<01:15,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 241/890 [00:28<01:16,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 242/890 [00:28<01:16,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 243/890 [00:28<01:15,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 244/890 [00:28<01:14,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 245/890 [00:28<01:14,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 246/890 [00:28<01:15,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 247/890 [00:28<01:15,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 248/890 [00:28<01:15,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 249/890 [00:29<01:15,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 250/890 [00:29<01:14,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 251/890 [00:29<01:14,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 252/890 [00:29<01:14,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 253/890 [00:29<01:14,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▊       | 254/890 [00:29<01:14,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▊       | 255/890 [00:29<01:14,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 256/890 [00:29<01:14,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 257/890 [00:29<01:13,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 258/890 [00:30<01:14,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 259/890 [00:30<01:13,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 260/890 [00:30<01:13,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 261/890 [00:30<01:14,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 262/890 [00:30<01:13,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 263/890 [00:30<01:12,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 264/890 [00:30<01:12,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 265/890 [00:30<01:12,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 266/890 [00:31<01:13,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 267/890 [00:31<01:13,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 268/890 [00:31<01:13,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 269/890 [00:31<01:13,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 270/890 [00:31<01:13,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 271/890 [00:31<01:12,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 272/890 [00:31<01:11,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 273/890 [00:31<01:13,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 274/890 [00:31<01:11,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 275/890 [00:32<01:11,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 276/890 [00:32<01:10,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 277/890 [00:32<01:10,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 278/890 [00:32<01:10,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███▏      | 279/890 [00:32<01:11,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███▏      | 280/890 [00:32<01:11,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 281/890 [00:32<01:11,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 282/890 [00:32<01:10,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 283/890 [00:33<01:10,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 284/890 [00:33<01:10,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 285/890 [00:33<01:10,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 286/890 [00:33<01:11,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 287/890 [00:33<01:11,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 288/890 [00:33<01:11,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 289/890 [00:33<01:11,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 290/890 [00:33<01:11,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 291/890 [00:33<01:09,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 292/890 [00:34<01:09,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 293/890 [00:34<01:09,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 294/890 [00:34<01:09,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 295/890 [00:34<01:10,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 296/890 [00:34<01:09,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 297/890 [00:34<01:08,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 298/890 [00:34<01:08,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▎      | 299/890 [00:34<01:09,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▎      | 300/890 [00:35<01:09,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 301/890 [00:35<01:09,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 302/890 [00:35<01:09,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 303/890 [00:35<01:08,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 304/890 [00:35<01:07,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 305/890 [00:35<01:08,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 306/890 [00:35<01:08,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 307/890 [00:35<01:08,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 308/890 [00:35<01:08,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 309/890 [00:36<01:07,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 310/890 [00:36<01:06,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 311/890 [00:36<01:07,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 312/890 [00:36<01:07,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 313/890 [00:36<01:07,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 314/890 [00:36<01:06,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 315/890 [00:36<01:06,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 316/890 [00:36<01:07,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 317/890 [00:37<01:07,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 318/890 [00:37<01:07,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 319/890 [00:37<01:07,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 320/890 [00:37<01:07,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 321/890 [00:37<01:07,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 322/890 [00:37<01:05,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▋      | 323/890 [00:37<01:05,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▋      | 324/890 [00:37<01:06,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 325/890 [00:37<01:06,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 326/890 [00:38<01:06,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 327/890 [00:38<01:06,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 328/890 [00:38<01:06,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 329/890 [00:38<01:06,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 330/890 [00:38<01:04,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 331/890 [00:38<01:04,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 332/890 [00:38<01:04,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 333/890 [00:38<01:05,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 334/890 [00:38<01:04,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 335/890 [00:39<01:04,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 336/890 [00:39<01:04,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 337/890 [00:39<01:04,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 338/890 [00:39<01:04,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 339/890 [00:39<01:05,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 340/890 [00:39<01:05,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 341/890 [00:39<01:04,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 342/890 [00:39<01:04,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▊      | 343/890 [00:40<01:03,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▊      | 344/890 [00:40<01:03,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 345/890 [00:40<01:03,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 346/890 [00:40<01:03,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 347/890 [00:40<01:03,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 348/890 [00:40<01:03,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 349/890 [00:40<01:02,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 350/890 [00:40<01:02,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 351/890 [00:40<01:03,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 352/890 [00:41<01:02,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 353/890 [00:41<01:02,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 354/890 [00:41<01:03,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 355/890 [00:41<01:03,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 356/890 [00:41<01:02,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 357/890 [00:41<01:01,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 358/890 [00:41<01:01,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 359/890 [00:41<01:02,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 360/890 [00:42<01:02,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 361/890 [00:42<01:02,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 362/890 [00:42<01:02,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 363/890 [00:42<01:02,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 364/890 [00:42<01:02,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 365/890 [00:42<01:01,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 366/890 [00:42<01:00,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 367/890 [00:42<01:00,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████▏     | 368/890 [00:42<01:01,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████▏     | 369/890 [00:43<01:01,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 370/890 [00:43<01:01,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 371/890 [00:43<01:01,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 372/890 [00:43<01:01,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 373/890 [00:43<01:00,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 374/890 [00:43<00:59,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 375/890 [00:43<00:59,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 376/890 [00:43<01:00,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 377/890 [00:44<01:00,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 378/890 [00:44<01:00,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 379/890 [00:44<01:00,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 380/890 [00:44<01:00,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 381/890 [00:44<00:59,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 382/890 [00:44<00:58,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 383/890 [00:44<00:58,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 384/890 [00:44<00:58,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 385/890 [00:44<00:58,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 386/890 [00:45<00:58,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 387/890 [00:45<00:58,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▎     | 388/890 [00:45<00:58,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▎     | 389/890 [00:45<00:58,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 390/890 [00:45<00:58,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 391/890 [00:45<00:58,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 392/890 [00:45<00:57,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 393/890 [00:45<00:57,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 394/890 [00:46<00:57,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 395/890 [00:46<00:58,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 396/890 [00:46<00:58,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 397/890 [00:46<00:58,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 398/890 [00:46<00:58,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 399/890 [00:46<00:56,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 400/890 [00:46<00:56,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 401/890 [00:46<00:56,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 402/890 [00:46<00:57,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 403/890 [00:47<00:57,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 404/890 [00:47<00:57,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 405/890 [00:47<00:57,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 406/890 [00:47<00:58,  8.26it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 407/890 [00:47<00:57,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 408/890 [00:47<00:57,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 409/890 [00:47<00:55,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 410/890 [00:47<00:55,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 411/890 [00:48<00:55,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▋     | 412/890 [00:48<00:56,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▋     | 413/890 [00:48<00:56,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 414/890 [00:48<00:56,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 415/890 [00:48<00:56,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 416/890 [00:48<00:56,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 417/890 [00:48<00:55,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 418/890 [00:48<00:54,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 419/890 [00:48<00:54,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 420/890 [00:49<00:54,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 421/890 [00:49<00:54,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 422/890 [00:49<00:54,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 423/890 [00:49<00:53,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 424/890 [00:49<00:53,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 425/890 [00:49<00:54,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 426/890 [00:49<00:54,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 427/890 [00:49<00:54,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 428/890 [00:50<00:54,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 429/890 [00:50<00:55,  8.34it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 430/890 [00:50<00:54,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 431/890 [00:50<00:53,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▊     | 432/890 [00:50<00:52,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▊     | 433/890 [00:50<00:53,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 434/890 [00:50<00:53,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 435/890 [00:50<00:53,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 436/890 [00:50<00:53,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 437/890 [00:51<00:53,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 438/890 [00:51<00:52,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 439/890 [00:51<00:52,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 440/890 [00:51<00:52,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 441/890 [00:51<00:52,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 442/890 [00:51<00:52,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 443/890 [00:51<00:51,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 444/890 [00:51<00:51,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 445/890 [00:52<00:52,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 446/890 [00:52<00:52,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 447/890 [00:52<00:52,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 448/890 [00:52<00:51,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 449/890 [00:52<00:50,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 450/890 [00:52<00:50,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 451/890 [00:52<00:51,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 452/890 [00:52<00:50,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 453/890 [00:52<00:51,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 454/890 [00:53<00:51,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 455/890 [00:53<00:50,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 456/890 [00:53<00:50,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████▏    | 457/890 [00:53<00:50,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████▏    | 458/890 [00:53<00:50,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 459/890 [00:53<00:50,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 460/890 [00:53<00:49,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 461/890 [00:53<00:49,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 462/890 [00:53<00:49,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 463/890 [00:54<00:49,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 464/890 [00:54<00:49,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 465/890 [00:54<00:49,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 466/890 [00:54<00:48,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 467/890 [00:54<00:49,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 468/890 [00:54<00:48,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 469/890 [00:54<00:48,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 470/890 [00:54<00:48,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 471/890 [00:55<00:48,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 472/890 [00:55<00:48,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 473/890 [00:55<00:48,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 474/890 [00:55<00:48,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 475/890 [00:55<00:49,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 476/890 [00:55<00:48,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▎    | 477/890 [00:55<00:48,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▎    | 478/890 [00:55<00:48,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 479/890 [00:55<00:47,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 480/890 [00:56<00:47,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 481/890 [00:56<00:47,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 482/890 [00:56<00:48,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 483/890 [00:56<00:48,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 484/890 [00:56<00:48,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 485/890 [00:56<00:48,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 486/890 [00:56<00:47,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 487/890 [00:56<00:46,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 488/890 [00:57<00:47,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 489/890 [00:57<00:46,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 490/890 [00:57<00:46,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 491/890 [00:57<00:46,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 492/890 [00:57<00:47,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 493/890 [00:57<00:47,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 494/890 [00:57<00:47,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 495/890 [00:57<00:46,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 496/890 [00:57<00:45,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 497/890 [00:58<00:45,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 498/890 [00:58<00:45,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 499/890 [00:58<00:45,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 500/890 [00:58<00:45,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▋    | 501/890 [00:58<00:46,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▋    | 502/890 [00:58<00:45,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 503/890 [00:58<00:45,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 504/890 [00:58<00:44,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 505/890 [00:59<00:45,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 506/890 [00:59<00:45,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 507/890 [00:59<00:45,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 508/890 [00:59<00:44,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 509/890 [00:59<00:45,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 510/890 [00:59<00:45,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 511/890 [00:59<00:45,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 512/890 [00:59<00:44,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 513/890 [00:59<00:43,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 514/890 [01:00<00:43,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 515/890 [01:00<00:43,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 516/890 [01:00<00:44,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 517/890 [01:00<00:44,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 518/890 [01:00<00:44,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 519/890 [01:00<00:44,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 520/890 [01:00<00:44,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▊    | 521/890 [01:00<00:43,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▊    | 522/890 [01:01<00:42,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 523/890 [01:01<00:42,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 524/890 [01:01<00:42,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 525/890 [01:01<00:42,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 526/890 [01:01<00:42,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 527/890 [01:01<00:42,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 528/890 [01:01<00:43,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 529/890 [01:01<00:43,  8.34it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 530/890 [01:01<00:42,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 531/890 [01:02<00:42,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 532/890 [01:02<00:41,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 533/890 [01:02<00:41,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 534/890 [01:02<00:41,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 535/890 [01:02<00:41,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 536/890 [01:02<00:41,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 537/890 [01:02<00:41,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 538/890 [01:02<00:40,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 539/890 [01:03<00:41,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 540/890 [01:03<00:40,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 541/890 [01:03<00:40,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 542/890 [01:03<00:40,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 543/890 [01:03<00:39,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 544/890 [01:03<00:40,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 545/890 [01:03<00:40,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████▏   | 546/890 [01:03<00:40,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████▏   | 547/890 [01:03<00:40,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 548/890 [01:04<00:40,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 549/890 [01:04<00:40,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 550/890 [01:04<00:40,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 551/890 [01:04<00:39,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 552/890 [01:04<00:39,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 553/890 [01:04<00:39,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 554/890 [01:04<00:39,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 555/890 [01:04<00:39,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 556/890 [01:05<00:39,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 557/890 [01:05<00:38,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 558/890 [01:05<00:38,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 559/890 [01:05<00:38,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 560/890 [01:05<00:38,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 561/890 [01:05<00:39,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 562/890 [01:05<00:39,  8.37it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 563/890 [01:05<00:38,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 564/890 [01:05<00:38,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 565/890 [01:06<00:38,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▎   | 566/890 [01:06<00:38,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▎   | 567/890 [01:06<00:37,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 568/890 [01:06<00:37,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 569/890 [01:06<00:37,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 570/890 [01:06<00:37,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 571/890 [01:06<00:37,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 572/890 [01:06<00:38,  8.33it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 573/890 [01:07<00:37,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 574/890 [01:07<00:37,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 575/890 [01:07<00:36,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 576/890 [01:07<00:36,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 577/890 [01:07<00:36,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 578/890 [01:07<00:36,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 579/890 [01:07<00:36,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 580/890 [01:07<00:36,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 581/890 [01:07<00:36,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 582/890 [01:08<00:35,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 583/890 [01:08<00:35,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 584/890 [01:08<00:35,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 585/890 [01:08<00:35,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 586/890 [01:08<00:35,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 587/890 [01:08<00:35,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 588/890 [01:08<00:35,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 589/890 [01:08<00:34,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▋   | 590/890 [01:08<00:35,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▋   | 591/890 [01:09<00:35,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 592/890 [01:09<00:34,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 593/890 [01:09<00:35,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 594/890 [01:09<00:35,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 595/890 [01:09<00:34,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 596/890 [01:09<00:34,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 597/890 [01:09<00:33,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 598/890 [01:09<00:33,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 599/890 [01:10<00:34,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 600/890 [01:10<00:33,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 601/890 [01:10<00:33,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 602/890 [01:10<00:33,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 603/890 [01:10<00:33,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 604/890 [01:10<00:33,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 605/890 [01:10<00:33,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 606/890 [01:10<00:33,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 607/890 [01:10<00:33,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 608/890 [01:11<00:33,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 609/890 [01:11<00:32,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▊   | 610/890 [01:11<00:32,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▊   | 611/890 [01:11<00:32,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 612/890 [01:11<00:32,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 613/890 [01:11<00:32,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 614/890 [01:11<00:32,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 615/890 [01:11<00:32,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 616/890 [01:12<00:31,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 617/890 [01:12<00:31,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 618/890 [01:12<00:31,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 619/890 [01:12<00:31,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 620/890 [01:12<00:30,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 621/890 [01:12<00:31,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 622/890 [01:12<00:31,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 623/890 [01:12<00:31,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 624/890 [01:12<00:31,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 625/890 [01:13<00:31,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 626/890 [01:13<00:31,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 627/890 [01:13<00:30,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 628/890 [01:13<00:30,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 629/890 [01:13<00:30,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 630/890 [01:13<00:30,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 631/890 [01:13<00:30,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 632/890 [01:13<00:30,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 633/890 [01:14<00:29,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 634/890 [01:14<00:29,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████▏  | 635/890 [01:14<00:29,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████▏  | 636/890 [01:14<00:29,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 637/890 [01:14<00:29,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 638/890 [01:14<00:29,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 639/890 [01:14<00:29,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 640/890 [01:14<00:29,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 641/890 [01:14<00:28,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 642/890 [01:15<00:28,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 643/890 [01:15<00:28,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 644/890 [01:15<00:29,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 645/890 [01:15<00:28,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 646/890 [01:15<00:28,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 647/890 [01:15<00:28,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 648/890 [01:15<00:28,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 649/890 [01:15<00:27,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 650/890 [01:16<00:27,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 651/890 [01:16<00:27,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 652/890 [01:16<00:27,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 653/890 [01:16<00:28,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 654/890 [01:16<00:27,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▎  | 655/890 [01:16<00:27,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▎  | 656/890 [01:16<00:27,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 657/890 [01:16<00:27,  8.37it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 658/890 [01:16<00:27,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 659/890 [01:17<00:27,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 660/890 [01:17<00:27,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 661/890 [01:17<00:26,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 662/890 [01:17<00:26,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 663/890 [01:17<00:26,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 664/890 [01:17<00:26,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 665/890 [01:17<00:26,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 666/890 [01:17<00:26,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 667/890 [01:18<00:26,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 668/890 [01:18<00:26,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 669/890 [01:18<00:26,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 670/890 [01:18<00:26,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 671/890 [01:18<00:25,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 672/890 [01:18<00:25,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 673/890 [01:18<00:25,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 674/890 [01:18<00:25,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 675/890 [01:18<00:25,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 676/890 [01:19<00:25,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 677/890 [01:19<00:25,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 678/890 [01:19<00:24,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▋  | 679/890 [01:19<00:24,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▋  | 680/890 [01:19<00:24,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 681/890 [01:19<00:24,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 682/890 [01:19<00:24,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 683/890 [01:19<00:24,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 684/890 [01:20<00:24,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 685/890 [01:20<00:24,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 686/890 [01:20<00:23,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 687/890 [01:20<00:23,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 688/890 [01:20<00:23,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 689/890 [01:20<00:23,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 690/890 [01:20<00:23,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 691/890 [01:20<00:23,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 692/890 [01:20<00:23,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 693/890 [01:21<00:23,  8.37it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 694/890 [01:21<00:23,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 695/890 [01:21<00:22,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 696/890 [01:21<00:22,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 697/890 [01:21<00:22,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 698/890 [01:21<00:22,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▊  | 699/890 [01:21<00:22,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▊  | 700/890 [01:21<00:22,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 701/890 [01:22<00:22,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 702/890 [01:22<00:21,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 703/890 [01:22<00:21,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 704/890 [01:22<00:21,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 705/890 [01:22<00:21,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 706/890 [01:22<00:21,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 707/890 [01:22<00:21,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 708/890 [01:22<00:21,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 709/890 [01:22<00:21,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 710/890 [01:23<00:20,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 711/890 [01:23<00:21,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 712/890 [01:23<00:20,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 713/890 [01:23<00:20,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 714/890 [01:23<00:20,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 715/890 [01:23<00:20,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 716/890 [01:23<00:20,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 717/890 [01:23<00:20,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 718/890 [01:24<00:20,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 719/890 [01:24<00:20,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 720/890 [01:24<00:20,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 721/890 [01:24<00:20,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 722/890 [01:24<00:19,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 723/890 [01:24<00:19,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████▏ | 724/890 [01:24<00:19,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████▏ | 725/890 [01:24<00:19,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 726/890 [01:24<00:19,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 727/890 [01:25<00:19,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 728/890 [01:25<00:19,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 729/890 [01:25<00:19,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 730/890 [01:25<00:19,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 731/890 [01:25<00:18,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 732/890 [01:25<00:18,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 733/890 [01:25<00:18,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 734/890 [01:25<00:18,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 735/890 [01:26<00:18,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 736/890 [01:26<00:18,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 737/890 [01:26<00:18,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 738/890 [01:26<00:18,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 739/890 [01:26<00:17,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 740/890 [01:26<00:17,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 741/890 [01:26<00:17,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 742/890 [01:26<00:17,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 743/890 [01:26<00:17,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▎ | 744/890 [01:27<00:17,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▎ | 745/890 [01:27<00:17,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 746/890 [01:27<00:17,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 747/890 [01:27<00:17,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 748/890 [01:27<00:16,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 749/890 [01:27<00:16,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 750/890 [01:27<00:16,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 751/890 [01:27<00:16,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 752/890 [01:28<00:16,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 753/890 [01:28<00:16,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 754/890 [01:28<00:16,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 755/890 [01:28<00:15,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 756/890 [01:28<00:15,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 757/890 [01:28<00:15,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 758/890 [01:28<00:15,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 759/890 [01:28<00:15,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 760/890 [01:28<00:15,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 761/890 [01:29<00:15,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 762/890 [01:29<00:14,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 763/890 [01:29<00:14,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 764/890 [01:29<00:14,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 765/890 [01:29<00:14,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 766/890 [01:29<00:14,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 767/890 [01:29<00:14,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▋ | 768/890 [01:29<00:14,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▋ | 769/890 [01:29<00:14,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 770/890 [01:30<00:14,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 771/890 [01:30<00:13,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 772/890 [01:30<00:13,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 773/890 [01:30<00:13,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 774/890 [01:30<00:13,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 775/890 [01:30<00:13,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 776/890 [01:30<00:13,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 777/890 [01:30<00:13,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 778/890 [01:31<00:13,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 779/890 [01:31<00:13,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 780/890 [01:31<00:12,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 781/890 [01:31<00:12,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 782/890 [01:31<00:12,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 783/890 [01:31<00:12,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 784/890 [01:31<00:12,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 785/890 [01:31<00:12,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 786/890 [01:31<00:12,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 787/890 [01:32<00:12,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▊ | 788/890 [01:32<00:12,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▊ | 789/890 [01:32<00:12,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 790/890 [01:32<00:11,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 791/890 [01:32<00:11,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 792/890 [01:32<00:11,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 793/890 [01:32<00:11,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 794/890 [01:32<00:11,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 795/890 [01:33<00:11,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 796/890 [01:33<00:11,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 797/890 [01:33<00:10,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 798/890 [01:33<00:10,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 799/890 [01:33<00:10,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 800/890 [01:33<00:10,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 801/890 [01:33<00:10,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 802/890 [01:33<00:10,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 803/890 [01:33<00:10,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 804/890 [01:34<00:10,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 805/890 [01:34<00:10,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 806/890 [01:34<00:09,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 807/890 [01:34<00:09,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 808/890 [01:34<00:09,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 809/890 [01:34<00:09,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 810/890 [01:34<00:09,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 811/890 [01:34<00:09,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 812/890 [01:35<00:09,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████▏| 813/890 [01:35<00:09,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████▏| 814/890 [01:35<00:09,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 815/890 [01:35<00:08,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 816/890 [01:35<00:08,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 817/890 [01:35<00:08,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 818/890 [01:35<00:08,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 819/890 [01:35<00:08,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 820/890 [01:35<00:08,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 821/890 [01:36<00:08,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 822/890 [01:36<00:07,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 823/890 [01:36<00:07,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 824/890 [01:36<00:07,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 825/890 [01:36<00:07,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 826/890 [01:36<00:07,  8.24it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 827/890 [01:36<00:07,  8.15it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 828/890 [01:36<00:07,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 829/890 [01:37<00:07,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 830/890 [01:37<00:07,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 831/890 [01:37<00:06,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 832/890 [01:37<00:06,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▎| 833/890 [01:37<00:06,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▎| 834/890 [01:37<00:06,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 835/890 [01:37<00:06,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 836/890 [01:37<00:06,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 837/890 [01:38<00:06,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 838/890 [01:38<00:06,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 839/890 [01:38<00:05,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 840/890 [01:38<00:05,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 841/890 [01:38<00:05,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 842/890 [01:38<00:05,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 843/890 [01:38<00:05,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 844/890 [01:38<00:05,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 845/890 [01:38<00:05,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 846/890 [01:39<00:05,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 847/890 [01:39<00:05,  7.36it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 849/890 [01:39<00:04,  8.32it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 850/890 [01:39<00:04,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 851/890 [01:39<00:04,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 852/890 [01:39<00:04,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 853/890 [01:39<00:04,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 854/890 [01:40<00:04,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 855/890 [01:40<00:04,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 856/890 [01:40<00:04,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▋| 857/890 [01:40<00:03,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▋| 858/890 [01:40<00:03,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 859/890 [01:40<00:03,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 860/890 [01:40<00:03,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 861/890 [01:40<00:03,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 862/890 [01:40<00:03,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 863/890 [01:41<00:03,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 864/890 [01:41<00:03,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 865/890 [01:41<00:02,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 866/890 [01:41<00:02,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 867/890 [01:41<00:02,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 868/890 [01:41<00:02,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 869/890 [01:41<00:02,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 870/890 [01:41<00:02,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 871/890 [01:42<00:02,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 872/890 [01:42<00:02,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 873/890 [01:42<00:01,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 874/890 [01:42<00:01,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 875/890 [01:42<00:01,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 876/890 [01:42<00:01,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▊| 877/890 [01:42<00:01,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▊| 878/890 [01:42<00:01,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 879/890 [01:42<00:01,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 880/890 [01:43<00:01,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 881/890 [01:43<00:01,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 882/890 [01:43<00:00,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 883/890 [01:43<00:00,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 884/890 [01:43<00:00,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 885/890 [01:43<00:00,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 886/890 [01:43<00:00,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 887/890 [01:43<00:00,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 888/890 [01:44<00:00,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 889/890 [01:44<00:00,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|██████████| 890/890 [01:44<00:00,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])





input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torc

  0%|          | 0/890 [00:00<?, ?it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  0%|          | 2/890 [00:00<01:16, 11.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  0%|          | 4/890 [00:00<01:30,  9.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 5/890 [00:00<01:34,  9.34it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 6/890 [00:00<01:37,  9.04it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 7/890 [00:00<01:40,  8.75it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 8/890 [00:00<01:39,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 9/890 [00:00<01:39,  8.87it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 10/890 [00:01<01:42,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|          | 11/890 [00:01<01:41,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|▏         | 12/890 [00:01<01:43,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  1%|▏         | 13/890 [00:01<01:43,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 14/890 [00:01<01:41,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 15/890 [00:01<01:41,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 16/890 [00:01<01:40,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 17/890 [00:01<01:40,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 18/890 [00:02<01:40,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 19/890 [00:02<01:41,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 20/890 [00:02<01:40,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 21/890 [00:02<01:40,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  2%|▏         | 22/890 [00:02<01:40,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 23/890 [00:02<01:41,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 24/890 [00:02<01:41,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 25/890 [00:02<01:41,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 26/890 [00:02<01:42,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 27/890 [00:03<01:41,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 28/890 [00:03<01:40,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 29/890 [00:03<01:39,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 30/890 [00:03<01:40,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  3%|▎         | 31/890 [00:03<01:40,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▎         | 32/890 [00:03<01:40,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▎         | 33/890 [00:03<01:40,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 34/890 [00:03<01:38,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 35/890 [00:04<01:38,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 36/890 [00:04<01:38,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 37/890 [00:04<01:38,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 38/890 [00:04<01:37,  8.76it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 39/890 [00:04<01:39,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  4%|▍         | 40/890 [00:04<01:38,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 41/890 [00:04<01:40,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 42/890 [00:04<01:38,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 43/890 [00:04<01:38,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▍         | 44/890 [00:05<01:37,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 45/890 [00:05<01:38,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 46/890 [00:05<01:38,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 47/890 [00:05<01:38,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  5%|▌         | 48/890 [00:05<01:39,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 49/890 [00:05<01:39,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 50/890 [00:05<01:39,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 51/890 [00:05<01:39,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 52/890 [00:06<01:38,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 53/890 [00:06<01:36,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 54/890 [00:06<01:37,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▌         | 55/890 [00:06<01:37,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▋         | 56/890 [00:06<01:38,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  6%|▋         | 57/890 [00:06<01:38,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 58/890 [00:06<01:38,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 59/890 [00:06<01:36,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 60/890 [00:06<01:36,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 61/890 [00:07<01:35,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 62/890 [00:07<01:36,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 63/890 [00:07<01:35,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 64/890 [00:07<01:34,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 65/890 [00:07<01:35,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  7%|▋         | 66/890 [00:07<01:36,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 67/890 [00:07<01:36,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 68/890 [00:07<01:36,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 69/890 [00:07<01:37,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 70/890 [00:08<01:36,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 71/890 [00:08<01:37,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 72/890 [00:08<01:33,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 73/890 [00:08<01:35,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 74/890 [00:08<01:35,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  8%|▊         | 75/890 [00:08<01:36,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▊         | 76/890 [00:08<01:35,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▊         | 77/890 [00:08<01:35,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 78/890 [00:09<01:35,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 79/890 [00:09<01:34,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 80/890 [00:09<01:33,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 81/890 [00:09<01:33,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 82/890 [00:09<01:33,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 83/890 [00:09<01:32,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


  9%|▉         | 84/890 [00:09<01:34,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 85/890 [00:09<01:33,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 86/890 [00:09<01:33,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 87/890 [00:10<01:33,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|▉         | 88/890 [00:10<01:34,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 89/890 [00:10<01:32,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 90/890 [00:10<01:31,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 91/890 [00:10<01:32,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 92/890 [00:10<01:33,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 10%|█         | 93/890 [00:10<01:33,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 94/890 [00:10<01:34,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 95/890 [00:11<01:34,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 96/890 [00:11<01:34,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 97/890 [00:11<01:34,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 98/890 [00:11<01:33,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 99/890 [00:11<01:31,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█         | 100/890 [00:11<01:31,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█▏        | 101/890 [00:11<01:32,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 11%|█▏        | 102/890 [00:11<01:32,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 103/890 [00:11<01:32,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 104/890 [00:12<01:31,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 105/890 [00:12<01:31,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 106/890 [00:12<01:30,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 107/890 [00:12<01:31,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 108/890 [00:12<01:31,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 109/890 [00:12<01:31,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 110/890 [00:12<01:30,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 12%|█▏        | 111/890 [00:12<01:31,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 112/890 [00:13<01:30,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 113/890 [00:13<01:31,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 114/890 [00:13<01:31,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 115/890 [00:13<01:31,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 116/890 [00:13<01:31,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 117/890 [00:13<01:30,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 118/890 [00:13<01:29,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 119/890 [00:13<01:29,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 13%|█▎        | 120/890 [00:13<01:30,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▎        | 121/890 [00:14<01:30,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▎        | 122/890 [00:14<01:30,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 123/890 [00:14<01:30,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 124/890 [00:14<01:29,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 125/890 [00:14<01:28,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 126/890 [00:14<01:28,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 127/890 [00:14<01:29,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 128/890 [00:14<01:29,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 14%|█▍        | 129/890 [00:15<01:29,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 130/890 [00:15<01:30,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 131/890 [00:15<01:30,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 132/890 [00:15<01:30,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▍        | 133/890 [00:15<01:29,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 134/890 [00:15<01:27,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 135/890 [00:15<01:27,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 136/890 [00:15<01:28,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 15%|█▌        | 137/890 [00:15<01:28,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 138/890 [00:16<01:29,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 139/890 [00:16<01:29,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 140/890 [00:16<01:28,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 141/890 [00:16<01:28,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 142/890 [00:16<01:27,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 143/890 [00:16<01:25,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▌        | 144/890 [00:16<01:26,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▋        | 145/890 [00:16<01:27,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 16%|█▋        | 146/890 [00:17<01:27,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 147/890 [00:17<01:27,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 148/890 [00:17<01:26,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 149/890 [00:17<01:26,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 150/890 [00:17<01:25,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 151/890 [00:17<01:26,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 152/890 [00:17<01:26,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 153/890 [00:17<01:26,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 154/890 [00:17<01:26,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 17%|█▋        | 155/890 [00:18<01:24,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 156/890 [00:18<01:25,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 157/890 [00:18<01:26,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 158/890 [00:18<01:25,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 159/890 [00:18<01:24,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 160/890 [00:18<01:23,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 161/890 [00:18<01:24,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 162/890 [00:18<01:25,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 163/890 [00:18<01:25,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 18%|█▊        | 164/890 [00:19<01:25,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▊        | 165/890 [00:19<01:25,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▊        | 166/890 [00:19<01:23,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 167/890 [00:19<01:23,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 168/890 [00:19<01:23,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 169/890 [00:19<01:24,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 170/890 [00:19<01:24,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 171/890 [00:19<01:24,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 172/890 [00:20<01:23,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 19%|█▉        | 173/890 [00:20<01:22,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 174/890 [00:20<01:23,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 175/890 [00:20<01:23,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 176/890 [00:20<01:23,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|█▉        | 177/890 [00:20<01:23,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 178/890 [00:20<01:24,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 179/890 [00:20<01:24,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 180/890 [00:20<01:24,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 181/890 [00:21<01:24,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 20%|██        | 182/890 [00:21<01:23,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 183/890 [00:21<01:22,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 184/890 [00:21<01:22,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 185/890 [00:21<01:22,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 186/890 [00:21<01:22,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 187/890 [00:21<01:22,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 188/890 [00:21<01:21,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██        | 189/890 [00:22<01:21,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██▏       | 190/890 [00:22<01:22,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 21%|██▏       | 191/890 [00:22<01:21,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 192/890 [00:22<01:21,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 193/890 [00:22<01:22,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 194/890 [00:22<01:20,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 195/890 [00:22<01:20,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 196/890 [00:22<01:20,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 197/890 [00:22<01:20,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 198/890 [00:23<01:21,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 199/890 [00:23<01:21,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 22%|██▏       | 200/890 [00:23<01:20,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 201/890 [00:23<01:19,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 202/890 [00:23<01:19,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 203/890 [00:23<01:19,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 204/890 [00:23<01:20,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 205/890 [00:23<01:19,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 206/890 [00:24<01:19,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 207/890 [00:24<01:19,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 208/890 [00:24<01:19,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 23%|██▎       | 209/890 [00:24<01:19,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▎       | 210/890 [00:24<01:19,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▎       | 211/890 [00:24<01:20,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 212/890 [00:24<01:20,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 213/890 [00:24<01:19,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 214/890 [00:24<01:18,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 215/890 [00:25<01:18,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 216/890 [00:25<01:19,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 217/890 [00:25<01:19,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 24%|██▍       | 218/890 [00:25<01:19,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 219/890 [00:25<01:20,  8.36it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 220/890 [00:25<01:19,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 221/890 [00:25<01:19,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▍       | 222/890 [00:25<01:19,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 223/890 [00:26<01:17,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 224/890 [00:26<01:17,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 225/890 [00:26<01:17,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 25%|██▌       | 226/890 [00:26<01:17,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 227/890 [00:26<01:17,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 228/890 [00:26<01:17,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 229/890 [00:26<01:18,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 230/890 [00:26<01:16,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 231/890 [00:26<01:16,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 232/890 [00:27<01:16,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▌       | 233/890 [00:27<01:16,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▋       | 234/890 [00:27<01:17,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 26%|██▋       | 235/890 [00:27<01:16,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 236/890 [00:27<01:16,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 237/890 [00:27<01:15,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 238/890 [00:27<01:16,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 239/890 [00:27<01:17,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 240/890 [00:28<01:16,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 241/890 [00:28<01:16,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 242/890 [00:28<01:16,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 243/890 [00:28<01:16,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 27%|██▋       | 244/890 [00:28<01:15,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 245/890 [00:28<01:14,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 246/890 [00:28<01:14,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 247/890 [00:28<01:15,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 248/890 [00:28<01:15,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 249/890 [00:29<01:15,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 250/890 [00:29<01:16,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 251/890 [00:29<01:15,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 252/890 [00:29<01:15,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 28%|██▊       | 253/890 [00:29<01:14,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▊       | 254/890 [00:29<01:13,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▊       | 255/890 [00:29<01:14,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 256/890 [00:29<01:15,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 257/890 [00:30<01:14,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 258/890 [00:30<01:15,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 259/890 [00:30<01:14,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 260/890 [00:30<01:14,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 261/890 [00:30<01:12,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 29%|██▉       | 262/890 [00:30<01:12,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 263/890 [00:30<01:13,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 264/890 [00:30<01:13,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 265/890 [00:30<01:13,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|██▉       | 266/890 [00:31<01:13,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 267/890 [00:31<01:14,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 268/890 [00:31<01:14,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 269/890 [00:31<01:13,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 270/890 [00:31<01:12,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 30%|███       | 271/890 [00:31<01:12,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 272/890 [00:31<01:12,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 273/890 [00:31<01:12,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 274/890 [00:32<01:12,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 275/890 [00:32<01:13,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 276/890 [00:32<01:12,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 277/890 [00:32<01:12,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███       | 278/890 [00:32<01:11,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███▏      | 279/890 [00:32<01:10,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 31%|███▏      | 280/890 [00:32<01:10,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 281/890 [00:32<01:10,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 282/890 [00:32<01:11,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 283/890 [00:33<01:11,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 284/890 [00:33<01:11,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 285/890 [00:33<01:11,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 286/890 [00:33<01:10,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 287/890 [00:33<01:09,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 288/890 [00:33<01:10,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 32%|███▏      | 289/890 [00:33<01:10,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 290/890 [00:33<01:10,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 291/890 [00:34<01:10,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 292/890 [00:34<01:11,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 293/890 [00:34<01:11,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 294/890 [00:34<01:10,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 295/890 [00:34<01:10,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 296/890 [00:34<01:09,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 297/890 [00:34<01:08,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 33%|███▎      | 298/890 [00:34<01:09,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▎      | 299/890 [00:34<01:09,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▎      | 300/890 [00:35<01:09,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 301/890 [00:35<01:08,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 302/890 [00:35<01:07,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 303/890 [00:35<01:08,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 304/890 [00:35<01:08,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 305/890 [00:35<01:08,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 306/890 [00:35<01:10,  8.26it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 34%|███▍      | 307/890 [00:35<01:08,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 308/890 [00:36<01:08,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 309/890 [00:36<01:08,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 310/890 [00:36<01:07,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▍      | 311/890 [00:36<01:06,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 312/890 [00:36<01:07,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 313/890 [00:36<01:07,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 314/890 [00:36<01:06,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 35%|███▌      | 315/890 [00:36<01:06,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 316/890 [00:36<01:07,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 317/890 [00:37<01:06,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 318/890 [00:37<01:07,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 319/890 [00:37<01:07,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 320/890 [00:37<01:06,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 321/890 [00:37<01:05,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▌      | 322/890 [00:37<01:05,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▋      | 323/890 [00:37<01:05,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 36%|███▋      | 324/890 [00:37<01:06,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 325/890 [00:37<01:06,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 326/890 [00:38<01:07,  8.37it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 327/890 [00:38<01:06,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 328/890 [00:38<01:06,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 329/890 [00:38<01:05,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 330/890 [00:38<01:04,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 331/890 [00:38<01:05,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 332/890 [00:38<01:05,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 37%|███▋      | 333/890 [00:38<01:05,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 334/890 [00:39<01:05,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 335/890 [00:39<01:05,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 336/890 [00:39<01:04,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 337/890 [00:39<01:04,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 338/890 [00:39<01:04,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 339/890 [00:39<01:04,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 340/890 [00:39<01:04,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 341/890 [00:39<01:04,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 38%|███▊      | 342/890 [00:39<01:03,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▊      | 343/890 [00:40<01:03,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▊      | 344/890 [00:40<01:03,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 345/890 [00:40<01:03,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 346/890 [00:40<01:03,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 347/890 [00:40<01:03,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 348/890 [00:40<01:03,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 349/890 [00:40<01:02,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 350/890 [00:40<01:02,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 39%|███▉      | 351/890 [00:41<01:02,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 352/890 [00:41<01:03,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 353/890 [00:41<01:03,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 354/890 [00:41<01:03,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|███▉      | 355/890 [00:41<01:03,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 356/890 [00:41<01:03,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 357/890 [00:41<01:03,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 358/890 [00:41<01:02,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 359/890 [00:41<01:01,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 40%|████      | 360/890 [00:42<01:02,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 361/890 [00:42<01:02,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 362/890 [00:42<01:02,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 363/890 [00:42<01:02,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 364/890 [00:42<01:02,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 365/890 [00:42<01:02,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 366/890 [00:42<01:01,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████      | 367/890 [00:42<01:00,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████▏     | 368/890 [00:43<01:00,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 41%|████▏     | 369/890 [00:43<01:01,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 370/890 [00:43<01:01,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 371/890 [00:43<01:01,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 372/890 [00:43<01:01,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 373/890 [00:43<01:00,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 374/890 [00:43<01:00,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 375/890 [00:43<01:00,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 376/890 [00:43<00:59,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 377/890 [00:44<00:59,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 42%|████▏     | 378/890 [00:44<00:59,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 379/890 [00:44<00:59,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 380/890 [00:44<00:58,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 381/890 [00:44<00:59,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 382/890 [00:44<00:59,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 383/890 [00:44<00:59,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 384/890 [00:44<00:59,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 385/890 [00:45<00:59,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 386/890 [00:45<00:59,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 43%|████▎     | 387/890 [00:45<00:58,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▎     | 388/890 [00:45<00:58,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▎     | 389/890 [00:45<00:58,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 390/890 [00:45<00:58,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 391/890 [00:45<00:58,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 392/890 [00:45<00:58,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 393/890 [00:45<00:57,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 394/890 [00:46<00:57,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 395/890 [00:46<00:57,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 44%|████▍     | 396/890 [00:46<00:57,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 397/890 [00:46<00:58,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 398/890 [00:46<00:56,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 399/890 [00:46<00:57,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▍     | 400/890 [00:46<00:57,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 401/890 [00:46<00:56,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 402/890 [00:47<00:57,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 403/890 [00:47<00:58,  8.36it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 45%|████▌     | 404/890 [00:47<00:55,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 405/890 [00:47<00:56,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 406/890 [00:47<00:55,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 407/890 [00:47<00:56,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 408/890 [00:47<00:56,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 409/890 [00:47<00:56,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 410/890 [00:47<00:57,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▌     | 411/890 [00:48<00:55,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▋     | 412/890 [00:48<00:54,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 46%|████▋     | 413/890 [00:48<00:55,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 414/890 [00:48<00:55,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 415/890 [00:48<00:56,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 416/890 [00:48<00:56,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 417/890 [00:48<00:55,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 418/890 [00:48<00:55,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 419/890 [00:48<00:55,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 420/890 [00:49<00:54,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 421/890 [00:49<00:54,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 47%|████▋     | 422/890 [00:49<00:54,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 423/890 [00:49<00:55,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 424/890 [00:49<00:55,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 425/890 [00:49<00:55,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 426/890 [00:49<00:55,  8.36it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 427/890 [00:49<00:54,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 428/890 [00:50<00:54,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 429/890 [00:50<00:54,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 430/890 [00:50<00:54,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 48%|████▊     | 431/890 [00:50<00:53,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▊     | 432/890 [00:50<00:53,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▊     | 433/890 [00:50<00:53,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 434/890 [00:50<00:53,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 435/890 [00:50<00:53,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 436/890 [00:50<00:52,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 437/890 [00:51<00:52,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 438/890 [00:51<00:52,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 439/890 [00:51<00:52,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 49%|████▉     | 440/890 [00:51<00:52,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 441/890 [00:51<00:52,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 442/890 [00:51<00:53,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 443/890 [00:51<00:53,  8.37it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|████▉     | 444/890 [00:51<00:52,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 445/890 [00:52<00:51,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 446/890 [00:52<00:51,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 447/890 [00:52<00:51,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 448/890 [00:52<00:51,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 50%|█████     | 449/890 [00:52<00:51,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 450/890 [00:52<00:58,  7.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 452/890 [00:52<00:51,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 453/890 [00:52<00:51,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 454/890 [00:53<00:51,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 455/890 [00:53<00:50,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████     | 456/890 [00:53<00:50,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████▏    | 457/890 [00:53<00:50,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 51%|█████▏    | 458/890 [00:53<00:50,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 459/890 [00:53<00:51,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 460/890 [00:53<00:51,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 461/890 [00:53<00:51,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 462/890 [00:54<00:50,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 463/890 [00:54<00:50,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 464/890 [00:54<00:49,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 465/890 [00:54<00:48,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 466/890 [00:54<00:49,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 52%|█████▏    | 467/890 [00:54<00:49,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 468/890 [00:54<00:50,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 469/890 [00:54<00:48,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 470/890 [00:54<00:48,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 471/890 [00:55<00:49,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 472/890 [00:55<00:48,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 473/890 [00:55<00:48,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 474/890 [00:55<00:48,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 475/890 [00:55<00:48,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 53%|█████▎    | 476/890 [00:55<00:48,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▎    | 477/890 [00:55<00:47,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▎    | 478/890 [00:55<00:48,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 479/890 [00:56<00:48,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 480/890 [00:56<00:48,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 481/890 [00:56<00:48,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 482/890 [00:56<00:48,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 483/890 [00:56<00:47,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 484/890 [00:56<00:46,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 54%|█████▍    | 485/890 [00:56<00:46,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 486/890 [00:56<00:47,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 487/890 [00:56<00:47,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 488/890 [00:57<00:47,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▍    | 489/890 [00:57<00:47,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 490/890 [00:57<00:47,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 491/890 [00:57<00:47,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 492/890 [00:57<00:46,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 55%|█████▌    | 493/890 [00:57<00:45,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 494/890 [00:57<00:45,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 495/890 [00:57<00:46,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 496/890 [00:58<00:46,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 497/890 [00:58<00:46,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 498/890 [00:58<00:45,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 499/890 [00:58<00:45,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▌    | 500/890 [00:58<00:45,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▋    | 501/890 [00:58<00:45,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 56%|█████▋    | 502/890 [00:58<00:45,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 503/890 [00:58<00:45,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 504/890 [00:58<00:44,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 505/890 [00:59<00:44,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 506/890 [00:59<00:45,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 507/890 [00:59<00:44,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 508/890 [00:59<00:44,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 509/890 [00:59<00:44,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 510/890 [00:59<00:43,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 57%|█████▋    | 511/890 [00:59<00:43,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 512/890 [00:59<00:43,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 513/890 [01:00<00:43,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 514/890 [01:00<00:43,  8.73it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 515/890 [01:00<00:43,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 516/890 [01:00<00:44,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 517/890 [01:00<00:43,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 518/890 [01:00<00:43,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 519/890 [01:00<00:43,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 58%|█████▊    | 520/890 [01:00<00:42,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▊    | 521/890 [01:00<00:42,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▊    | 522/890 [01:01<00:42,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 523/890 [01:01<00:42,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 524/890 [01:01<00:42,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 525/890 [01:01<00:42,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 526/890 [01:01<00:42,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 527/890 [01:01<00:42,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 528/890 [01:01<00:42,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 59%|█████▉    | 529/890 [01:01<00:42,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 530/890 [01:01<00:41,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 531/890 [01:02<00:41,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 532/890 [01:02<00:41,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|█████▉    | 533/890 [01:02<00:41,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 534/890 [01:02<00:41,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 535/890 [01:02<00:41,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 536/890 [01:02<00:41,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 537/890 [01:02<00:40,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 60%|██████    | 538/890 [01:02<00:40,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 539/890 [01:03<00:40,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 540/890 [01:03<00:40,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 541/890 [01:03<00:40,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 542/890 [01:03<00:40,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 543/890 [01:03<00:40,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 544/890 [01:03<00:40,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████    | 545/890 [01:03<00:40,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████▏   | 546/890 [01:03<00:40,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 61%|██████▏   | 547/890 [01:03<00:40,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 548/890 [01:04<00:39,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 549/890 [01:04<00:39,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 550/890 [01:04<00:39,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 551/890 [01:04<00:39,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 552/890 [01:04<00:39,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 553/890 [01:04<00:38,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 554/890 [01:04<00:38,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 555/890 [01:04<00:39,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 62%|██████▏   | 556/890 [01:05<00:39,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 557/890 [01:05<00:39,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 558/890 [01:05<00:38,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 559/890 [01:05<00:38,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 560/890 [01:05<00:37,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 561/890 [01:05<00:37,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 562/890 [01:05<00:38,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 563/890 [01:05<00:38,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 564/890 [01:05<00:38,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 63%|██████▎   | 565/890 [01:06<00:38,  8.36it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▎   | 566/890 [01:06<00:38,  8.36it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▎   | 567/890 [01:06<00:38,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 568/890 [01:06<00:38,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 569/890 [01:06<00:37,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 570/890 [01:06<00:37,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 571/890 [01:06<00:37,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 572/890 [01:06<00:37,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 573/890 [01:07<00:37,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 64%|██████▍   | 574/890 [01:07<00:37,  8.35it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 575/890 [01:07<00:37,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 576/890 [01:07<00:37,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 577/890 [01:07<00:37,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▍   | 578/890 [01:07<00:36,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 579/890 [01:07<00:36,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 580/890 [01:07<00:36,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 581/890 [01:07<00:36,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 65%|██████▌   | 582/890 [01:08<00:36,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 583/890 [01:08<00:36,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 584/890 [01:08<00:36,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 585/890 [01:08<00:36,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 586/890 [01:08<00:36,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 587/890 [01:08<00:35,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 588/890 [01:08<00:35,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▌   | 589/890 [01:08<00:35,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▋   | 590/890 [01:09<00:35,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 66%|██████▋   | 591/890 [01:09<00:35,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 592/890 [01:09<00:35,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 593/890 [01:09<00:35,  8.37it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 594/890 [01:09<00:35,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 595/890 [01:09<00:34,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 596/890 [01:09<00:34,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 597/890 [01:09<00:34,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 598/890 [01:09<00:33,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 599/890 [01:10<00:33,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 67%|██████▋   | 600/890 [01:10<00:34,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 601/890 [01:10<00:34,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 602/890 [01:10<00:34,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 603/890 [01:10<00:34,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 604/890 [01:10<00:33,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 605/890 [01:10<00:32,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 606/890 [01:10<00:32,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 607/890 [01:11<00:33,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 608/890 [01:11<00:33,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 68%|██████▊   | 609/890 [01:11<00:33,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▊   | 610/890 [01:11<00:33,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▊   | 611/890 [01:11<00:33,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 612/890 [01:11<00:33,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 613/890 [01:11<00:32,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 614/890 [01:11<00:33,  8.30it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 615/890 [01:11<00:31,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 616/890 [01:12<00:31,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 617/890 [01:12<00:31,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 69%|██████▉   | 618/890 [01:12<00:31,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 619/890 [01:12<00:31,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 620/890 [01:12<00:31,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 621/890 [01:12<00:31,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|██████▉   | 622/890 [01:12<00:31,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 623/890 [01:12<00:31,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 624/890 [01:13<00:31,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 625/890 [01:13<00:30,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 626/890 [01:13<00:30,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 70%|███████   | 627/890 [01:13<00:30,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 628/890 [01:13<00:30,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 629/890 [01:13<00:30,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 630/890 [01:13<00:30,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 631/890 [01:13<00:30,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 632/890 [01:13<00:30,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 633/890 [01:14<00:30,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████   | 634/890 [01:14<00:29,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████▏  | 635/890 [01:14<00:29,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 71%|███████▏  | 636/890 [01:14<00:29,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 637/890 [01:14<00:29,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 638/890 [01:14<00:29,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 639/890 [01:14<00:29,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 640/890 [01:14<00:29,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 641/890 [01:15<00:29,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 642/890 [01:15<00:28,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 643/890 [01:15<00:28,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 644/890 [01:15<00:28,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 72%|███████▏  | 645/890 [01:15<00:28,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 646/890 [01:15<00:28,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 647/890 [01:15<00:28,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 648/890 [01:15<00:28,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 649/890 [01:15<00:29,  8.30it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 650/890 [01:16<00:28,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 651/890 [01:16<00:27,  8.70it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 652/890 [01:16<00:27,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 653/890 [01:16<00:27,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 73%|███████▎  | 654/890 [01:16<00:27,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▎  | 655/890 [01:16<00:27,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▎  | 656/890 [01:16<00:27,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 657/890 [01:16<00:27,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 658/890 [01:17<00:27,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 659/890 [01:17<00:26,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 660/890 [01:17<00:26,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 661/890 [01:17<00:26,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 662/890 [01:17<00:26,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 74%|███████▍  | 663/890 [01:17<00:26,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 664/890 [01:17<00:26,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 665/890 [01:17<00:26,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 666/890 [01:17<00:26,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▍  | 667/890 [01:18<00:25,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 668/890 [01:18<00:26,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 669/890 [01:18<00:26,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 670/890 [01:18<00:25,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 75%|███████▌  | 671/890 [01:18<00:25,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 672/890 [01:18<00:25,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 673/890 [01:18<00:25,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 674/890 [01:18<00:25,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 675/890 [01:18<00:25,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 676/890 [01:19<00:25,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 677/890 [01:19<00:25,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▌  | 678/890 [01:19<00:24,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▋  | 679/890 [01:19<00:24,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 76%|███████▋  | 680/890 [01:19<00:24,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 681/890 [01:19<00:24,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 682/890 [01:19<00:24,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 683/890 [01:19<00:24,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 684/890 [01:20<00:24,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 685/890 [01:20<00:24,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 686/890 [01:20<00:24,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 687/890 [01:20<00:24,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 688/890 [01:20<00:23,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 77%|███████▋  | 689/890 [01:20<00:23,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 690/890 [01:20<00:23,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 691/890 [01:20<00:23,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 692/890 [01:20<00:23,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 693/890 [01:21<00:23,  8.36it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 694/890 [01:21<00:23,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 695/890 [01:21<00:23,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 696/890 [01:21<00:22,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 697/890 [01:21<00:22,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 78%|███████▊  | 698/890 [01:21<00:22,  8.72it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▊  | 699/890 [01:21<00:22,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▊  | 700/890 [01:21<00:22,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 701/890 [01:22<00:22,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 702/890 [01:22<00:22,  8.37it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 703/890 [01:22<00:22,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 704/890 [01:22<00:22,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 705/890 [01:22<00:21,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 706/890 [01:22<00:21,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 79%|███████▉  | 707/890 [01:22<00:21,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 708/890 [01:22<00:21,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 709/890 [01:22<00:21,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 710/890 [01:23<00:21,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|███████▉  | 711/890 [01:23<00:21,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 712/890 [01:23<00:21,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 713/890 [01:23<00:21,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 714/890 [01:23<00:20,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 715/890 [01:23<00:20,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 80%|████████  | 716/890 [01:23<00:20,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 717/890 [01:23<00:19,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 718/890 [01:24<00:20,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 719/890 [01:24<00:20,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 720/890 [01:24<00:20,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 721/890 [01:24<00:20,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 722/890 [01:24<00:19,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████  | 723/890 [01:24<00:19,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████▏ | 724/890 [01:24<00:19,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 81%|████████▏ | 725/890 [01:24<00:19,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 726/890 [01:24<00:19,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 727/890 [01:25<00:19,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 728/890 [01:25<00:19,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 729/890 [01:25<00:19,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 730/890 [01:25<00:18,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 731/890 [01:25<00:18,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 732/890 [01:25<00:18,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 733/890 [01:25<00:18,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 82%|████████▏ | 734/890 [01:25<00:18,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 735/890 [01:26<00:17,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 736/890 [01:26<00:17,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 737/890 [01:26<00:17,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 738/890 [01:26<00:17,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 739/890 [01:26<00:17,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 740/890 [01:26<00:17,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 741/890 [01:26<00:17,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 742/890 [01:26<00:17,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 83%|████████▎ | 743/890 [01:26<00:16,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▎ | 744/890 [01:27<00:16,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▎ | 745/890 [01:27<00:16,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 746/890 [01:27<00:16,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 747/890 [01:27<00:16,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 748/890 [01:27<00:16,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 749/890 [01:27<00:16,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 750/890 [01:27<00:16,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 751/890 [01:27<00:16,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 84%|████████▍ | 752/890 [01:28<00:16,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 753/890 [01:28<00:16,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 754/890 [01:28<00:16,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 755/890 [01:28<00:15,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▍ | 756/890 [01:28<00:15,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 757/890 [01:28<00:15,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 758/890 [01:28<00:15,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 759/890 [01:28<00:15,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 85%|████████▌ | 760/890 [01:28<00:15,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 761/890 [01:29<00:15,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 762/890 [01:29<00:15,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 763/890 [01:29<00:15,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 764/890 [01:29<00:15,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 765/890 [01:29<00:14,  8.35it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 766/890 [01:29<00:14,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▌ | 767/890 [01:29<00:14,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▋ | 768/890 [01:29<00:14,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 86%|████████▋ | 769/890 [01:30<00:14,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 770/890 [01:30<00:14,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 771/890 [01:30<00:13,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 772/890 [01:30<00:13,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 773/890 [01:30<00:13,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 774/890 [01:30<00:13,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 775/890 [01:30<00:13,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 776/890 [01:30<00:13,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 777/890 [01:30<00:13,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 87%|████████▋ | 778/890 [01:31<00:13,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 779/890 [01:31<00:12,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 780/890 [01:31<00:12,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 781/890 [01:31<00:12,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 782/890 [01:31<00:12,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 783/890 [01:31<00:12,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 784/890 [01:31<00:12,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 785/890 [01:31<00:12,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 786/890 [01:32<00:12,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 88%|████████▊ | 787/890 [01:32<00:11,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▊ | 788/890 [01:32<00:11,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▊ | 789/890 [01:32<00:11,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 790/890 [01:32<00:11,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 791/890 [01:32<00:11,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 792/890 [01:32<00:11,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 793/890 [01:32<00:11,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 794/890 [01:32<00:11,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 795/890 [01:33<00:11,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 89%|████████▉ | 796/890 [01:33<00:11,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 797/890 [01:33<00:11,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 798/890 [01:33<00:10,  8.39it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 799/890 [01:33<00:10,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|████████▉ | 800/890 [01:33<00:10,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 801/890 [01:33<00:10,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 802/890 [01:33<00:10,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 803/890 [01:34<00:10,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 804/890 [01:34<00:10,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 90%|█████████ | 805/890 [01:34<00:10,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 806/890 [01:34<00:09,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 807/890 [01:34<00:09,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 808/890 [01:34<00:09,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 809/890 [01:34<00:09,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 810/890 [01:34<00:09,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 811/890 [01:34<00:09,  8.66it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████ | 812/890 [01:35<00:09,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████▏| 813/890 [01:35<00:09,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 91%|█████████▏| 814/890 [01:35<00:09,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 815/890 [01:35<00:08,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 816/890 [01:35<00:08,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 817/890 [01:35<00:08,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 818/890 [01:35<00:08,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 819/890 [01:35<00:08,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 820/890 [01:36<00:08,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 821/890 [01:36<00:08,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 822/890 [01:36<00:07,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 92%|█████████▏| 823/890 [01:36<00:07,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 824/890 [01:36<00:07,  8.59it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 825/890 [01:36<00:07,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 826/890 [01:36<00:07,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 827/890 [01:36<00:07,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 828/890 [01:36<00:07,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 829/890 [01:37<00:07,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 830/890 [01:37<00:06,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 831/890 [01:37<00:06,  8.63it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 93%|█████████▎| 832/890 [01:37<00:06,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▎| 833/890 [01:37<00:06,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▎| 834/890 [01:37<00:06,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 835/890 [01:37<00:06,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 836/890 [01:37<00:06,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 837/890 [01:37<00:06,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 838/890 [01:38<00:06,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 839/890 [01:38<00:06,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 840/890 [01:38<00:05,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 94%|█████████▍| 841/890 [01:38<00:05,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 842/890 [01:38<00:05,  8.51it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 843/890 [01:38<00:05,  8.69it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 844/890 [01:38<00:05,  8.53it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▍| 845/890 [01:38<00:05,  8.49it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 846/890 [01:39<00:05,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 847/890 [01:39<00:05,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 848/890 [01:39<00:04,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 95%|█████████▌| 849/890 [01:39<00:04,  8.44it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 850/890 [01:39<00:04,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 851/890 [01:39<00:04,  8.45it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 852/890 [01:39<00:04,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 853/890 [01:39<00:04,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 854/890 [01:39<00:04,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 855/890 [01:40<00:04,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▌| 856/890 [01:40<00:04,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▋| 857/890 [01:40<00:03,  8.46it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 96%|█████████▋| 858/890 [01:40<00:03,  8.48it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 859/890 [01:40<00:03,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 860/890 [01:40<00:03,  8.67it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 861/890 [01:40<00:03,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 862/890 [01:40<00:03,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 863/890 [01:41<00:03,  8.50it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 864/890 [01:41<00:03,  8.38it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 865/890 [01:41<00:02,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 866/890 [01:41<00:02,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 97%|█████████▋| 867/890 [01:41<00:02,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 868/890 [01:41<00:02,  8.68it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 869/890 [01:41<00:02,  8.61it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 870/890 [01:41<00:02,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 871/890 [01:41<00:02,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 872/890 [01:42<00:02,  8.42it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 873/890 [01:42<00:02,  8.40it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 874/890 [01:42<00:01,  8.41it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 875/890 [01:42<00:01,  8.43it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 98%|█████████▊| 876/890 [01:42<00:01,  8.62it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▊| 877/890 [01:42<00:01,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▊| 878/890 [01:42<00:01,  8.56it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 879/890 [01:42<00:01,  8.71it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 880/890 [01:43<00:01,  8.78it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 881/890 [01:43<00:01,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 882/890 [01:43<00:00,  8.58it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 883/890 [01:43<00:00,  8.52it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 884/890 [01:43<00:00,  8.47it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


 99%|█████████▉| 885/890 [01:43<00:00,  8.55it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 886/890 [01:43<00:00,  8.57it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 887/890 [01:43<00:00,  8.64it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 888/890 [01:43<00:00,  8.65it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|█████████▉| 889/890 [01:44<00:00,  8.60it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])


100%|██████████| 890/890 [01:44<00:00,  8.54it/s]

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])





input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torc

In [None]:
evaluate(model, df_test)

input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torch.Size([2, 512])
mask shape: torch.Size([2, 512])
input_id shape: torc

Epoch를 2 -> 5로 늘려서 진행한 결과 Accuracy가 0.857 -> 0.991로 유의미하게 증가