# char-RNN-文本生成
## 教學目標
使用 RNN 弄出一個基本的生成文字模型，幫助初學者上手 RNN

## 適用對象
適用於已經學過 PyTorch 基本語法的人

## 執行方法
在 Jupyter notebook 中，選取想要執行的區塊後，使用以下其中一種方法執行

- 上方工具列中，按下 Cell < Run Cells 執行
- 使用快捷鍵 Shift + Enter 執行

## 大綱
- [載入資料](#載入資料)
- [前處理](#前處理)
- [建立字典](#建立字典)
- [超參數](#超參數)
- [資料分批](#資料分批)
- [模型設計](#模型設計)
- [訓練](#訓練)
- [生成](#生成)

## 檔案來源
- [Kaggle HC 新聞資料集](https://www.kaggle.com/alvations/old-newspapers#old-newspaper.tsv)
- 下載後請放到路徑 `專案資料夾/data/old-newspaper.tsv`

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!pip install opencc

Collecting opencc
  Downloading OpenCC-1.1.7-cp310-cp310-manylinux1_x86_64.whl (779 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/779.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/779.8 kB[0m [31m1.6 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m778.2/779.8 kB[0m [31m11.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.8/779.8 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opencc
Successfully installed opencc-1.1.7


In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn
import torch.nn.utils.rnn
import torch.utils.data
import matplotlib.pyplot as plt
import seaborn as sns
import os
import opencc

data_path = './gdrive/MyDrive/ikm_lab/GAI/data'

# 載入資料
- 請務必先[下載](https://www.kaggle.com/alvations/old-newspapers#old-newspaper.tsv)資料後將資料放置到 `data` 資料夾之下
- `tsv` 檔案類似 `csv`，只是用 `\t` 做分隔符號
- 資料內容包含

|欄位|意義|資料型態|
|-|-|-|
|`Language`|語系|文字（類別）|
|`Source`|新聞來源|文字|
|`Date`|時間|文字|
|`Text`|文字內容|文字|

In [51]:
# df1 = pd.read_csv(os.path.join(data_path + '/old-newspaper.tsv'), sep='\t')
df = pd.read_csv(os.path.join(data_path + '/arithmetic.csv'))
# 看一下前幾筆資料是什麼樣子
df.head()
# df1.head()

Unnamed: 0,src,tgt
0,0+0=,0
1,0-0=,0
2,0*0=,0
3,(0+0)*0=,0
4,0+0*0=,0


# 前處理
- 訓練目標為生成繁體中文字
    - 所以只考量繁體中文的資料
    - 類別為 `Chinese (Traditional)`
    - 共約 333735 筆
- 資料長度不一
    - 畫出長度分佈圖
    - 計算長度四分位數、最小值、最大值
    - 為了方便訓練，只考慮長度介於 60~200 的新聞

In [None]:
# df[df['Language'] == 'Chinese (Traditional)'].shape
df.shape

(2632500, 2)

In [52]:
# 只取前7000筆，因爲原資料量太大了，不方便演示
# df = df[df['Language'] == 'Chinese (Traditional)'].iloc[:7000]
# df['src'].iloc[:]

# df = df.iloc[:26325]
df['src'] = df['src'].astype(str)
df['tgt'] = df['tgt'].astype(str)
df['combined'] = df['src'].str.cat(df['tgt'])
df.head()

Unnamed: 0,src,tgt,combined
0,0+0=,0,0+0=0
1,0-0=,0,0-0=0
2,0*0=,0,0*0=0
3,(0+0)*0=,0,(0+0)*0=0
4,0+0*0=,0,0+0*0=0


In [53]:
df.shape

(2632500, 3)

In [None]:
# 簡單做一下統計
# df['len'] = df['Text'].apply(lambda x: len(str(x)))
# print(df['len'].describe())
# print(df[df['len'] <= 200].shape[0])
# print(df[df['len'] >= 60].shape[0])
# print(df[(df['len'] >= 60) & (df['len'] <= 200)].shape[0])

# 建立字典
- 無法直接利用純文字進行計算
- 將所有文字轉換成數字
- 字典大小約為 `7000`
- 特殊字
    - '&lt;pad&gt;'
        - 每個 batch 所包含的句子長度不同
        - 將長度使用 '&lt;pad&gt;' 補成 batch 中最大值者
    - '&lt;eos&gt;'
        - 指定生成的結尾
        - 沒有 '&lt;eos&gt;' 會不知道何時停止生成

In [54]:
# 一個dict把中文字符轉化成id
char_to_id = {}
# 把id轉回中文字符
id_to_char = {}

# 有一些必須要用的special token先添加進來(一般用來做padding的token的id是0)
char_to_id['<pad>'] = 0
char_to_id['<eos>'] = 1
id_to_char[0] = '<pad>'
id_to_char[1] = '<eos>'


# 把所有資料集中出現的token都記錄到dict中
for char in set(df['combined'].str.cat()):
    ch_id = len(char_to_id)
    char_to_id[char] = ch_id
    id_to_char[ch_id] = char

vocab_size = len(char_to_id)
print('字典大小: {}'.format(vocab_size))

字典大小: 18


In [55]:
print(char_to_id['='])

17


In [56]:
# # 把資料集的所有資料都變成id
df['char_id_list'] = df['combined'].apply(lambda text: [char_to_id[char] for char in list(text)] + [char_to_id['<eos>']])
df['ans_id_list'] = df['tgt'].apply(lambda text: [char_to_id[char] for char in list(text)] + [char_to_id['<eos>']])
df[['src','tgt','combined','char_id_list','ans_id_list']].head()

Unnamed: 0,src,tgt,combined,char_id_list,ans_id_list
0,0+0=,0,0+0=0,"[4, 6, 4, 17, 4, 1]","[4, 1]"
1,0-0=,0,0-0=0,"[4, 13, 4, 17, 4, 1]","[4, 1]"
2,0*0=,0,0*0=0,"[4, 16, 4, 17, 4, 1]","[4, 1]"
3,(0+0)*0=,0,(0+0)*0=0,"[3, 4, 6, 4, 15, 16, 4, 17, 4, 1]","[4, 1]"
4,0+0*0=,0,0+0*0=0,"[4, 6, 4, 16, 4, 17, 4, 1]","[4, 1]"


In [60]:
brackets_df = pd.DataFrame(columns=['src', 'tgt','combined','char_id_list','ans_id_list'])
no_brackets_df = pd.DataFrame(columns=['src', 'tgt','combined','char_id_list','ans_id_list'])

positive_df = pd.DataFrame(columns=['src', 'tgt','combined','char_id_list','ans_id_list'])
negative_df = pd.DataFrame(columns=['src', 'tgt','combined','char_id_list','ans_id_list'])

first_df = df.iloc[:2600]
last_df = df.iloc[2630000:]
for src,tgt,combined,char_id,ans_id in zip(first_df['src'],first_df['tgt'],first_df['combined'],first_df['char_id_list'],first_df['ans_id_list']):
  if '(' in src:
    brackets_df.loc[len(brackets_df)] = [src,tgt,combined,char_id,ans_id]
  else :
    no_brackets_df.loc[len(no_brackets_df)] = [src,tgt,combined,char_id,ans_id]
  if '-' in tgt:
    negative_df.loc[len(negative_df)] = [src,tgt,combined,char_id,ans_id]
  else  :
    positive_df.loc[len(positive_df)] = [src,tgt,combined,char_id,ans_id]
for src,tgt in zip(last_df['src'],last_df['tgt']):
  if '(' in src:
    brackets_df.loc[len(brackets_df)] = [src,tgt,combined,char_id,ans_id]
  else :
    no_brackets_df.loc[len(no_brackets_df)] = [src,tgt,combined,char_id,ans_id]
  if '-' in tgt:
    negative_df.loc[len(negative_df)] = [src,tgt,combined,char_id,ans_id]
  else  :
    positive_df.loc[len(positive_df)] = [src,tgt,combined,char_id,ans_id]


# 超參數

|超參數|意義|數值|
|-|-|-|
|`batch_size`|單一 batch 的資料數|64|
|`epochs`|總共要訓練幾個 epoch|10|
|`embed_dim`|文字的 embedding 維度|50|
|`hidden_dim`|LSTM 中每個時間的 hidden state 維度|50|
|`lr`|Learning Rate|0.001|
|`grad_clip`|為了避免 RNN 出現梯度爆炸問題，將梯度限制範圍|1|

In [58]:
batch_size = 64
epochs = 3
embed_dim = 256
hidden_dim = 256
lr = 0.001
grad_clip = 1

# 資料分批
- 使用 `torch.utils.data.Dataset` 建立資料產生的工具 `dataset`
- 再使用 `torch.utils.data.DataLoader` 對資料集 `dataset` 隨機抽樣並作為一個 batch


In [None]:
# # 這裏的dataset是文本生成的dataset，輸入和輸出的資料都是文章
# # 舉個例子，現在的狀況是：
# # input:  A B C D E F
# # output: B C D E F <eos>
# # 而對於加減法的任務：
# # input:  1 + 2 + 3 = 6
# # output: / / / / / 6 <eos>
# # /的部分都不用算loss，主要是預測=的後面，這裏的答案是6，所以output是6 <eos>
# class Dataset(torch.utils.data.Dataset):
#     def __init__(self, sequences):
#         self.sequences = sequences

#     def __getitem__(self, index):
#         # input:  A B C D E F
#         # output: B C D E F <eos>
#         x = self.sequences.iloc[index][:-1]
#         y = self.sequences.iloc[index][1:]
#         return x, y

#     def __len__(self):
#         return len(self.sequences)

# def collate_fn(batch):
#     batch_x = [torch.tensor(data[0]) for data in batch] # list[torch.tensor]
#     batch_y = [torch.tensor(data[1]) for data in batch] # list[torch.tensor]
#     batch_x_lens = torch.LongTensor([len(x) for x in batch_x])
#     batch_y_lens = torch.LongTensor([len(y) for y in batch_y])

#     # torch.tensor
#     # [[1968, 1891, 3580, ... , 0, 0, 0],
#     #  [1014, 2242, 2247, ... , 0, 0, 0],
#     #  [3032,  522, 1485, ... , 0, 0, 0]]
#     #                       padding↑
#     pad_batch_x = torch.nn.utils.rnn.pad_sequence(batch_x,
#                                                   batch_first=True, # shape=(batch_size, seq_len)
#                                                   padding_value=char_to_id['<pad>'])

#     pad_batch_y = torch.nn.utils.rnn.pad_sequence(batch_y,
#                                                   batch_first=True, # shape=(batch_size, seq_len)
#                                                   padding_value=char_to_id['<pad>'])

#     return pad_batch_x, pad_batch_y, batch_x_lens, batch_y_lens


In [59]:
# 這裏的dataset是文本生成的dataset，輸入和輸出的資料都是文章
# 舉個例子，現在的狀況是：
# input:  A B C D E F
# output: B C D E F <eos>
# 而對於加減法的任務：
# input:  1 + 2 + 3 = 6
# output: / / / / / 6 <eos>
# /的部分都不用算loss，主要是預測=的後面，這裏的答案是6，所以output是6 <eos>
class Dataset(torch.utils.data.Dataset):
    def __init__(self, equation, answer):
        self.equation = equation
        self.answer = answer

    def __getitem__(self, index):
        # input:  A B C D E F
        # output: B C D E F <eos>
        x = self.equation.iloc[index][:]
        y = self.answer.iloc[index][:]
        return x, y

    def __len__(self):
        return len(self.equation)

def collate_fn(batch):
    batch_x = [torch.tensor(data[0]) for data in batch] # list[torch.tensor]
    batch_y = [torch.tensor(data[1]) for data in batch] # list[torch.tensor]
    batch_x_lens = torch.LongTensor([len(x) for x in batch_x])
    batch_y_lens = torch.LongTensor([len(y) for y in batch_y])

    # torch.tensor
    # [[1968, 1891, 3580, ... , 0, 0, 0],
    #  [1014, 2242, 2247, ... , 0, 0, 0],
    #  [3032,  522, 1485, ... , 0, 0, 0]]
    #                       padding↑
    pad_batch_x = torch.nn.utils.rnn.pad_sequence(batch_x,
                            batch_first=True, # shape=(batch_size, seq_len)
                            padding_value=char_to_id['<pad>'])

    pad_batch_y = torch.nn.utils.rnn.pad_sequence(batch_y,
                            batch_first=True, # shape=(batch_size, seq_len)
                            padding_value=char_to_id['<pad>'])

    return pad_batch_x, pad_batch_y, batch_x_lens, batch_y_lens


In [62]:
# dataset = Dataset(df['char_id_list'])
math_dataset = Dataset(df['char_id_list'].iloc[26300:27300],df['ans_id_list'].iloc[26300:27300])
validation_dataset = Dataset(df['char_id_list'].iloc[26364:26428],df['ans_id_list'].iloc[26364:26428])
bracket_dataset = Dataset(brackets_df['char_id_list'],brackets_df['ans_id_list'])
no_brackets_dataset = Dataset(no_brackets_df['char_id_list'],no_brackets_df['ans_id_list'])
positive_dataset = Dataset(positive_df['char_id_list'],positive_df['ans_id_list'])
negative_dataset = Dataset(negative_df['char_id_list'],negative_df['ans_id_list'])

In [63]:
train_data_loader = torch.utils.data.DataLoader(math_dataset,
                      batch_size=batch_size,
                      shuffle=True,
                      collate_fn=collate_fn)
train_brackets_data_loader = torch.utils.data.DataLoader(bracket_dataset,
                      batch_size=batch_size,
                      shuffle=True,
                      collate_fn=collate_fn)
train_no_brackets_data_loader = torch.utils.data.DataLoader(no_brackets_dataset,
                      batch_size=batch_size,
                      shuffle=True,
                      collate_fn=collate_fn)
train_positive_data_loader = torch.utils.data.DataLoader(positive_dataset,
                      batch_size=batch_size,
                      shuffle=True,
                      collate_fn=collate_fn)
train_negative_data_loader = torch.utils.data.DataLoader(negative_dataset,
                      batch_size=batch_size,
                      shuffle=True,
                      collate_fn=collate_fn)
validation_data_loader = torch.utils.data.DataLoader(validation_dataset,
                      batch_size=batch_size,
                      shuffle=True,
                      collate_fn=collate_fn)

# 模型設計

## 執行順序
1. 將句子中的所有字轉換成 embedding
2. 按照句子順序將 embedding 丟入 LSTM
3. LSTM 的輸出再丟給 LSTM，可以接上更多層
4. 最後的 LSTM 所有時間點的輸出丟進一層 Fully Connected
5. 輸出結果所有維度中的最大者即為下一個字

## 損失函數
因為是類別預測，所以使用 Cross Entropy

## 梯度更新
使用 Adam 演算法進行梯度更新

In [44]:
class CharRNN(torch.nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super(CharRNN, self).__init__()

        # Embedding層
        self.embedding = torch.nn.Embedding(num_embeddings=vocab_size,
                                            embedding_dim=embed_dim,
                                            padding_idx=char_to_id['<pad>'])

        # RNN層
        self.rnn_layer1 = torch.nn.LSTM(input_size=embed_dim,
                                        hidden_size=hidden_dim,
                                        batch_first=True)

        self.rnn_layer2 = torch.nn.LSTM(input_size=hidden_dim,
                                        hidden_size=hidden_dim,
                                        batch_first=True)

        # output層
        self.linear = torch.nn.Sequential(torch.nn.Linear(in_features=hidden_dim,
                                                          out_features=hidden_dim),
                                          torch.nn.ReLU(),
                                          torch.nn.Linear(in_features=hidden_dim,
                                                          out_features=vocab_size))

    def forward(self, batch_x, batch_x_lens):
        return self.encoder(batch_x, batch_x_lens)

    def encoder(self, batch_x, batch_x_lens):
        batch_x = self.embedding(batch_x)

        # 假設有個tensor : tensor([[1, 2, 3, 4],
        #                        [9, 0, 0, 0]])
        # 輸出就是：PackedSequence(data=tensor([1, 9, 2, 3, 4]),
        #                         batch_sizes=tensor([2, 1, 1, 1]),
        #                         sorted_indices=None, unsorted_indices=None)
        # torch.nn.utils.rnn.pack_padded_sequence 會把batch當中的句子從長到短排序，建立如上所示的資料結構
        # 就像上一個例子一樣，RNN會先吃第一個batch內的第一個batch_size，看到這個地方的batch_size爲2，所以此時RNN會吃兩個token，輸出一個2Xhidden_dim的向量組
        # 然後看第二個batch_size, 此時爲1，少了一個，說明其中一個序列到頭了，那就取上一個輸出向量的第一個，再生成一個1Xhidden_dim的向量
        # [
        # [1,2,3],                data = [1,4,6,2,5,3]   output  data = [1p,4p,6p,2p,5p,3p]             [1p,2p,3p]
        # [4,5,0], => pack_padded_sequence => batch_sizes = [3,2,1] => RNN => batch_sizes = [3,2,1] => pad_packed_sequence => [4p,5p,0]
        # [6,0,0]         _                                                     [6p,0,0]
        # ]
        batch_x = torch.nn.utils.rnn.pack_padded_sequence(batch_x,
                                                          batch_x_lens,
                                                          batch_first=True,
                                                          enforce_sorted=False)

        batch_x, _ = self.rnn_layer1(batch_x)
        batch_x, _ = self.rnn_layer2(batch_x)

        batch_x, _ = torch.nn.utils.rnn.pad_packed_sequence(batch_x,
                                                            batch_first=True)

        batch_x = self.linear(batch_x)

        return batch_x

    def generator(self, start_char, max_len=200):

        char_list = [char_to_id[start_char]]

        next_char = None

        # 生成的長度沒達到max_len就一直生
        while len(char_list) < max_len:
            x = torch.LongTensor(char_list).unsqueeze(0)
            x = self.embedding(x)
            _, (ht, _) = self.rnn_layer1(x)
            _, (ht, _) = self.rnn_layer2(ht)
            y = self.linear(ht)

            next_char = np.argmax(y.numpy())
            # 如果看到新的token是<eos>就說明生成結束了，就停下
            if next_char == char_to_id['<eos>']:
                break

            char_list.append(next_char)

        return [id_to_char[ch_id] for ch_id in char_list]

In [64]:
torch.manual_seed(2)
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

model = CharRNN(vocab_size,
                embed_dim,
                hidden_dim)

In [65]:
criterion = torch.nn.CrossEntropyLoss(ignore_index=char_to_id['<pad>'], reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# 訓練
1. 最外層的 `for` 迴圈控制 `epoch`
    1. 內層的 `for` 迴圈透過 `data_loader` 取得 batch
        1. 丟給 `model` 進行訓練
        2. 預測結果 `batch_pred_y` 跟真正的答案 `batch_y` 進行 Cross Entropy 得到誤差 `loss`
        3. 使用 `loss.backward` 自動計算梯度
        4. 使用 `torch.nn.utils.clip_grad_value_` 將梯度限制在 `-grad_clip` &lt; &lt; `grad_clip` 之間
        5. 使用 `optimizer.step()` 進行更新（back propagation）
2. 每 `1000` 個 batch 就輸出一次當前的 loss 觀察是否有收斂的趨勢

In [74]:
from tqdm import tqdm
model = model.to(device)
model.train()
i = 0
total_loss = 0.0
num_batches = 0
find = False
for epoch in range(1, epochs+1):
    process_bar = tqdm(train_brackets_data_loader, desc=f"Training epoch {epoch}")
    for batch_x, batch_y, batch_x_lens, batch_y_lens in process_bar:

        # 標準DL訓練幾板斧
        optimizer.zero_grad()
        batch_pred_y = model(batch_x.to(device), batch_x_lens)

        # 找batch_x的等號位置
        equ_pos = []
        # print()
        for tensor in batch_x:
          for idx, value in enumerate(tensor):
            if value.item() == char_to_id['=']:
                equ_pos.append(idx)
                # print(tensor)
                # print(idx)
        # 看tensor_pred_y和batch_y
        tensor_y = torch.zeros_like(batch_y)
        for i in range(0,batch_y_lens.size(0)):
          idx = equ_pos[i]+1
          for j in range(0,batch_y.size(1)):
            tensor_y[i][j]=torch.argmax(batch_pred_y[i][idx+j],0)
        for tensor,batch in zip(tensor_y,batch_y):
          # print(tensor,batch)
          print('pred_y:',end='')
          for val in tensor:
            print(id_to_char[val.item()],end=" ")
          print()
          print('batch_y:',end='')
          for val in batch:
            print(id_to_char[val.item()],end=" ")
          print()
        tensor_pred_y = torch.empty(batch_y.size(0),batch_y.size(1),18)
        # print(f"batch_y size:{batch_y.size()}")
        # print(f"batch_pred_y size:{batch_pred_y.size()}")
        for i in range(0,batch_y.size(0)):
          equ = equ_pos[i]+1
          for j in range(0,batch_y.size(1)):
            for k in range(0,18):
              # print(batch_pred_y[i][equ+j][k])
              tensor_pred_y[i][j][k] = batch_pred_y[i][equ+j][k]
        # batch_pred_y = batch_pred_y.view(-1, vocab_size)
        tensor_pred_y = tensor_pred_y.view(-1,vocab_size)
        # print(tensor_pred_y.size())
        # print(tensor_pred_y)
        # print(batch_pred_y.size())
        batch_y = batch_y.view(-1).to(device)

        loss = criterion(tensor_pred_y,batch_y)
        loss.backward()
        torch.nn.utils.clip_grad_value_(model.parameters(), grad_clip)
        optimizer.step()
        total_loss += loss.item()
        num_batches += 1
        i+=1
        if i%10==0:
          process_bar.set_postfix(loss=loss.item())
    average_loss = total_loss / num_batches
    print(f"Training Loss: {average_loss}")
    # 麻煩各位同學加上 validation 的部分
    model.eval()  # 将模型设置为评估模式
    total_loss = 0.0
    num_batches = 0
    validation_process_bar = tqdm(validation_data_loader,desc="Validation")
    for batch_x, batch_y, batch_x_lens, batch_y_lens in validation_process_bar:
        optimizer.zero_grad()
        batch_pred_y = model(batch_x.to(device), batch_x_lens)

        # 找等号位置
        equ_pos = []
        for tensor in batch_x:
            for idx, value in enumerate(tensor):
                if value.item() == char_to_id['=']:
                    equ_pos.append(idx)

        tensor_pred_y = torch.empty(batch_y.size(0), batch_y.size(1), 18)
        for i in range(0, batch_y.size(0)):
            equ = equ_pos[i]+1
            for j in range(0, batch_y.size(1)):
                for k in range(0, 18):
                    tensor_pred_y[i][j][k] = batch_pred_y[i][equ + j][k]

        tensor_pred_y = tensor_pred_y.view(-1, vocab_size)
        batch_y = batch_y.view(-1).to(device)

        loss = criterion(tensor_pred_y, batch_y)
        total_loss += loss.item()
        num_batches += 1
    average_loss = total_loss / num_batches
    print(f"Validation Loss: {average_loss}")
    model.train()
    # for ... in validation_process_bar:
    #     pred = model...


Training epoch 1:   0%|          | 0/46 [00:00<?, ?it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 7 <eos> 2 
batch_y:3 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 2 <eos> 2 
batch_y:4 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 

Training epoch 1:   2%|▏         | 1/46 [00:00<00:38,  1.16it/s]

pred_y:2 7 <eos> 2 
batch_y:2 7 <eos> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 <eos> 2 2 
batch_y:3 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 1 <eos> 
batch_y:- 4 1 <eos> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:3 5 <eos> 2 
batch_y:3 5 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:- 1 3 <eos> 
batch_y:- 1 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y

Training epoch 1:   4%|▍         | 2/46 [00:01<00:37,  1.16it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 1 <eos> 
batch_y:- 2 1 <eos> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 7 <eos> 2 
batch_y:2 7 <eos> <pad> 
pred_y:- 4 6 <eos> 
batch_y:- 4 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <

Training epoch 1:   7%|▋         | 3/46 [00:02<00:37,  1.15it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:7 <eos> 2 2 
batch_y:7 <eos> <pad> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 7 <eos> 2 
batch_y:4 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:9 <eos> 2 2 
batch_y:9 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 7 <eos> 
batch_y:- 2 7 <eos> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:3 <eos> 2 2 
batch_y:3 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:

Training epoch 1:   9%|▊         | 4/46 [00:03<00:36,  1.15it/s]

pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 9 <eos> 2 
batch_y:4 9 <eos> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:- 3 9 <eos> 
batch_y:- 3 9 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:2 <e

Training epoch 1:  11%|█         | 5/46 [00:04<00:35,  1.15it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 9 <eos> 2 
batch_y:4 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:- 4 9 <eos> 
batch_y:- 4 9 <eos> 
pred_y:- 3 8 <eos> 
batch_y:- 3 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:6 <eos> 2 2 

Training epoch 1:  13%|█▎        | 6/46 [00:05<00:34,  1.15it/s]

pred_y:- 2 1 <eos> 
batch_y:- 2 1 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 8 <eos> 2 
batch_y:4 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 6 <eos> 
batch_y:- 4 6 <eos> 
pred_y:- 3 

Training epoch 1:  15%|█▌        | 7/46 [00:06<00:34,  1.14it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 3 <eos> 2 
batch_y:4 3 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:

Training epoch 1:  17%|█▋        | 8/46 [00:06<00:33,  1.15it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 5 <eos> 2 
batch_y:1 5 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:1 9 <eos> 2 
batch_y:1 9 <eos> <pad> 
pred_y:- 1 7 <eos> 
batch_y:- 1 7 <eos> 
pred_y:- 2 7 <eos> 
batch_y:- 2 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:- 7 <eos> 2 
batch_y:- 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:3 9 <eos> 2 
batch_y:3 9 <eos> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad>

Training epoch 1:  20%|█▉        | 9/46 [00:07<00:32,  1.15it/s]

pred_y:- 4 8 <eos> 
batch_y:- 4 8 <eos> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 2 <eos> 2 
batch_y:2 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:1 9 <eos> 2 
batch_y:1 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 6 <eos> 
batch_y:- 4 6 <eos> 
pred_y:- 7 <eos> 2 
batch_y:- 7 <eos> <pad> 
pred_y:- 4 4 <eos> 
batch_y:- 4 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 3 <eos> 2 
batch_y:4 3 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 8 <eos> 
batch_y:- 1 8 <eos>

Training epoch 1:  22%|██▏       | 10/46 [00:08<00:31,  1.16it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 2 <eos> 2 
batch_y:2 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 9 <eos> 2 
batch_y:1 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <p

Training epoch 1:  24%|██▍       | 11/46 [00:09<00:30,  1.16it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 6 <eos> 2 
batch_y:2 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 6 <eos> 2 
batch_y:2 6 <eos> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:7 <eos> 2 2 
batch_y:7 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 8 <eos> 
batch_y:- 3 8 <eos> 
pred_y:

Training epoch 1:  26%|██▌       | 12/46 [00:10<00:32,  1.06it/s]

pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:- 2 5 <eos> 
batch_y:- 2 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 2 <eos> 2 
batch_y:4 2 <eos> <pad> 
pred_y:- 2 3 <eos> 
batch_y:- 2 3 <eos> 
pred_y:4 3 <eos> 2 
batch_y:4 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
bat

Training epoch 1:  28%|██▊       | 13/46 [00:11<00:33,  1.00s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 2 <eos> 
batch_y:- 1 2 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 3 <eos> 2 
batch_y:4 3 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 <eos> 2 2 
batch_y:1 <eos> <pad> <pad> 
pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pre

Training epoch 1:  30%|███       | 14/46 [00:12<00:33,  1.05s/it]

pred_y:3 1 <eos> 2 
batch_y:3 1 <eos> <pad> 
pred_y:- 1 5 <eos> 
batch_y:- 1 5 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 9 <eos> 2 
batch_y:3 9 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 3 <eos> 2 
batch_y:1 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:- 2 5 <eos> 
batch_y:- 2 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:

Training epoch 1:  33%|███▎      | 15/46 [00:13<00:31,  1.02s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 8 <eos> 
batch_y:- 4 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:7 <eos> 2 2 
batch_y:7 <eos> <pad> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <p

Training epoch 1:  35%|███▍      | 16/46 [00:14<00:29,  1.03it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 9 <eos> 2 
batch_y:2 9 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 9 <eos> 2 
batch_y:1 9 <eos> <pad> 
pred_y:4 4 <eos> 2 
batch_y:4 4 <eos> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 6 <eos> 2 
batch_y:4 6 <eos> <pad> 
pred_y:1 4 <eos> 2 
batch_y:1 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 

Training epoch 1:  37%|███▋      | 17/46 [00:15<00:27,  1.05it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 8 <eos> 
batch_y:- 4 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 

Training epoch 1:  39%|███▉      | 18/46 [00:16<00:26,  1.07it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:2 9 <eos> 2 
batch_y:2 9 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 <eos> 2 2 
batch_y:3 <eos> <pad> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:1 8 <eos> 2 
batch_y:1 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:1 8 <eos> 2 
batch_y:1 8 <eos> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 9 <eos> 
batch_y:- 3 9 <eos> 
pred_y:3 6 <eos> 2 
batch_y

Training epoch 1:  41%|████▏     | 19/46 [00:17<00:24,  1.09it/s]

pred_y:- 2 3 <eos> 
batch_y:- 2 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 6 <eos> 
batch_y:- 4 6 <eos> 
pred_y:- 1 3 <eos> 
batch_y:- 1 3 <eos> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 7 <eos> 2 
batch_y:3 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:8 <eos> 2 2 
batch_y:8 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:- 3 9 <e

Training epoch 1:  43%|████▎     | 20/46 [00:18<00:23,  1.10it/s]

pred_y:2 1 <eos> 2 
batch_y:2 1 <eos> <pad> 
pred_y:1 4 <eos> 2 
batch_y:1 4 <eos> <pad> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pre

Training epoch 1:  46%|████▌     | 21/46 [00:19<00:22,  1.11it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 7 <eos> 
batch_y:- 3 7 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:1 <eos> 2 2 
batch_y:1 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:- 2 3 <eos> 
batch_y:- 2 3 <eos> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 1 <eos> 
batch_y:- 2 1 <eos> 
pred_y:- 4 4 <eos> 
batch_y:- 4 4 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pa

Training epoch 1:  48%|████▊     | 22/46 [00:20<00:21,  1.13it/s]

pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 7 <eos> 
batch_y:- 1 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 6 <eos> 2 
batch_y:2 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 7 <eos> 2 
batch_y:4 7 <eos> <pad> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 

Training epoch 1:  50%|█████     | 23/46 [00:20<00:20,  1.13it/s]

pred_y:- 4 6 <eos> 
batch_y:- 4 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 6 <eos> 2 
batch_y:4 6 <eos> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 3 <eos> 
batch_y:- 4 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 

Training epoch 1:  52%|█████▏    | 24/46 [00:21<00:19,  1.13it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:9 <eos> 2 2 
batch_y:9 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 6 <eos> 
batch_y:- 3 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 8 <eos> 
batch_y:- 3 8 <eos> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 2 <eos> 
batch_y:- 1 2 <eos> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 7 <eos> 
batch_y:- 3 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 

Training epoch 1:  54%|█████▍    | 25/46 [00:22<00:18,  1.14it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:3 5 <eos> 2 
batch_y:3 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:7 <eos> 2 2 
batch_y:7 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pre

Training epoch 1:  57%|█████▋    | 26/46 [00:23<00:17,  1.12it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 5 <eos> 
batch_y:- 4 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 1 <eos> 2 
batch_y:3 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <p

Training epoch 1:  59%|█████▊    | 27/46 [00:24<00:18,  1.04it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 1 <eos> 
batch_y:- 4 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:1 <eos> 2 2 
batch_y:1 <eos> <pad> <pad> 
pred_y:2 9 <eos> 2 
batch_y:2 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 3 <eos> 2 
batch_y:1 3 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:- 2 5 <eos> 
batch_y:- 2 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:

Training epoch 1:  61%|██████    | 28/46 [00:25<00:18,  1.02s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 2 <eos> 
batch_y:- 3 2 <eos> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 2 <eos> 2 
batch_y:2 2 <eos> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:3 5 <eos> 2 
batch_y:3 5 <eos> <pad> 
pred_y:1 8 <eos> 2 
batch_y:1 8 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 2 <eos> 2 
batch_y:2 2 <eos> <pad> 
pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 

Training epoch 1:  63%|██████▎   | 29/46 [00:27<00:18,  1.07s/it]

pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:- 1 2 <eos> 
batch_y:- 1 2 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 7 <eos> 2 
batch_y:1 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 4 <eos> 2 
batch_y:4 4 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:2 1 <eos> 2 
batch_y:2 1 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 6 <eos> 
batch_y:- 2 6 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 

Training epoch 1:  65%|██████▌   | 30/46 [00:27<00:16,  1.02s/it]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 7 <eos> 
batch_y:- 3 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 5 <eos> 
batch_y:- 1 5 <eos> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 <eos> 2 2 
batch_y:1 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 7 <eos> 
batch_y:- 1 7 <eos> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:- 3 2 <eos> 
batch_y:- 3 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 

Training epoch 1:  67%|██████▋   | 31/46 [00:28<00:14,  1.03it/s]

pred_y:- 7 <eos> 2 
batch_y:- 7 <eos> <pad> 
pred_y:8 <eos> 2 2 
batch_y:8 <eos> <pad> <pad> 
pred_y:8 <eos> 2 2 
batch_y:8 <eos> <pad> <pad> 
pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:4 4 <eos> 2 
batch_y:4 4 <eos> <pad> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 9 <eos> 2 
batch_y:3 9 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <

Training epoch 1:  70%|██████▉   | 32/46 [00:29<00:13,  1.05it/s]

pred_y:- 4 6 <eos> 
batch_y:- 4 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 <eos> 2 
batch_y:- 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <p

Training epoch 1:  72%|███████▏  | 33/46 [00:30<00:12,  1.07it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 5 <eos> 2 
batch_y:1 5 <eos> <pad> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:- 2 6 <eos> 
batch_y:- 2 6 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 7 <eos> 2 
batch_y:1 7 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 7 <eos> 
batch_y:- 2 7 <eos> 
pred_y:4 8 <eos> 2 
batch_y:4 8 <eos> <pad> 
pred_y:- 4 5 <eos> 
batch_y:- 4 5 <eos> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 

Training epoch 1:  74%|███████▍  | 34/46 [00:31<00:10,  1.10it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:- 9 <eos> 2 
batch_y:- 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 2 <eos> 2 
batch_y:2 2 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 9 <eos> 
batch_y:- 1 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:1 8 <eos> 2 
batch_y:1 8 <eos> <pad> 
pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:2 <e

Training epoch 1:  76%|███████▌  | 35/46 [00:32<00:09,  1.11it/s]

pred_y:1 4 <eos> 2 
batch_y:1 4 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 5 <eos> 
batch_y:- 1 5 <eos> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 5 <eos> 2 
batch_y:4 5 <eos> <pad> 
pred_y:0 <e

Training epoch 1:  78%|███████▊  | 36/46 [00:33<00:08,  1.12it/s]

pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 9 <eos> 
batch_y:- 3 9 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 9 <eos> 
batch_y:- 4 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 9 <eos> 
batch_y:- 3 9 <eos> 
pred_y:2 <eos> 

Training epoch 1:  80%|████████  | 37/46 [00:34<00:08,  1.12it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 5 <eos> 2 
batch_y:1 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 8 <eos> 2 
batch_y:1 8 <eos> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 1 <eos> 2 
batch_y:3 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <p

Training epoch 1:  83%|████████▎ | 38/46 [00:35<00:07,  1.12it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 8 <eos> 
batch_y:- 3 8 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 <eos> 2 2 
batch_y:1 <eos> <pad> <pad> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:- 4 1 <eos> 
batch_y:- 4 1 <eos> 
pred_y:- 4 5 <eos> 
batch_y

Training epoch 1:  85%|████████▍ | 39/46 [00:35<00:06,  1.13it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 2 <eos> 
batch_y:- 1 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 8 <eos> 
batch_y:- 3 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 2 <eos> 
batch_y:- 1 2 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 7 

Training epoch 1:  87%|████████▋ | 40/46 [00:36<00:05,  1.13it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:- 3 7 <eos> 
batch_y:- 3 7 <eos> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:3 5 <eos> 2 
batch_y:3 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 2 <eos> 
batch_y:- 1 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
bat

Training epoch 1:  89%|████████▉ | 41/46 [00:37<00:04,  1.07it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 7 <eos> 
batch_y:- 2 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 <eos> 2 2 
batch_y:3 <eos> <pad> <pad> 
pred_y:1 5 <eos> 2 
batch_y:1 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 6 <eos> 
batch_y:- 2 6 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 

Training epoch 1:  91%|█████████▏| 42/46 [00:38<00:04,  1.00s/it]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 3 <eos> 
batch_y:- 2 3 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 3 <eos> 
batch_y:- 3 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 9 <eos> 2 
batch_y:4 9 <eos> <pad> 
pred_y:1 5 <eos> 2 
batch_y:1 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 4 <eos> 2 
batch_y:1 4 <eos> <pad> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:

Training epoch 1:  93%|█████████▎| 43/46 [00:40<00:03,  1.04s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 9 <eos> 2 
batch_y:- 9 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 6 <eos> 
batch_y:- 3 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 2 <eos> 
batch_y:- 3 2 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 3 <eos> 
batch_y:- 4 3 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:- 2 1 <eos> 

Training epoch 1:  96%|█████████▌| 44/46 [00:41<00:02,  1.08s/it]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 2 <eos> 2 
batch_y:4 2 <eos> <pad> 
pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:1 4 <eos> 2 
batch_y:1 4 <eos> <pad> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:- 4 8 <eos> 
batch_y:- 4 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 8 <eos> 
batch_y:- 1 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 7 <eos> 2 
batch_y:4 7 <eos> <pad> 
pred_y:- 4 4 <eos> 
batch_y:- 4 4 <eos> 
pred_y:- 4 3 <eos> 
batch_y:- 4 3 <eos> 
pred_y:- 1 8 <eos> 
batch_y:- 1 8 <eos> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eo

Training epoch 1:  98%|█████████▊| 45/46 [00:42<00:01,  1.02s/it]

pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 9 <eos> 
batch_y:- 1 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 8 <eos> 
batch_y:- 4 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 8 <eos> 2 
batch_y:4 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 5 <eos> 
batch_y:- 3 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 

Training epoch 1: 100%|██████████| 46/46 [00:42<00:00,  1.08it/s]


Training Loss: 0.0004123184142214165


Validation: 100%|██████████| 1/1 [00:00<00:00,  4.51it/s]


Validation Loss: 0.19861754775047302


Training epoch 2:   0%|          | 0/46 [00:00<?, ?it/s]

pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 8 <eos> 2 
batch_y:1 8 <eos> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 

Training epoch 2:   2%|▏         | 1/46 [00:00<00:37,  1.18it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 5 <eos> 
batch_y:- 3 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 4 <eos> 
batch_y:- 4 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <e

Training epoch 2:   4%|▍         | 2/46 [00:01<00:38,  1.15it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 4 <eos> 2 
batch_y:4 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 6 <eos> 
batch_y:- 3 6 <eos> 
pred_y:

Training epoch 2:   7%|▋         | 3/46 [00:02<00:38,  1.13it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:- 1 3 <eos> 
batch_y:- 1 3 <eos> 
pred_y:- 2 7 <eos> 
batch_y:- 2 7 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <e

Training epoch 2:   9%|▊         | 4/46 [00:03<00:37,  1.13it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:- 3 8 <eos> 
batch_y:- 3 8 <eos> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:- 4 3 <eos> 
batch_y:- 4 3 <eos> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 5 <eos> 
batch_y:- 2 5 <eos> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:3 <eos> 2 2 
batch_y:3 <eos> <pad> <pad> 
pred_y:- 1 5 <eos> 
batch_y:- 1 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:- 2 <eos> 2 
batch_y:- 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad>

Training epoch 2:  11%|█         | 5/46 [00:04<00:36,  1.14it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 7 <eos> 2 
batch_y:- 7 <eos> <pad> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:8 <eos> 2 2 
batch_y:8 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 7 <eos> 
batch_y:- 1 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 5 <eos> 2 
batch_y:2 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad

Training epoch 2:  13%|█▎        | 6/46 [00:05<00:34,  1.15it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:4 6 <eos> 2 
batch_y:4 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <p

Training epoch 2:  15%|█▌        | 7/46 [00:06<00:34,  1.14it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:- 2 7 <eos> 
batch_y:- 2 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:2 2 <eos> 2 
batch_y:2 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 7 

Training epoch 2:  17%|█▋        | 8/46 [00:07<00:33,  1.14it/s]

pred_y:1 5 <eos> 2 
batch_y:1 5 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:3 3 <eos> 2 
batch_y:3 3 <eos> <pad> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 9 <eos> 
batch_y:- 4 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:1 5 <eos> 2 
batch_y:1 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 2 <eos> 2 
batch_y:4 2 <eos> <pad> 
pred_y:2 <eos> 2 2 

Training epoch 2:  20%|█▉        | 9/46 [00:07<00:32,  1.14it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 7 <eos> 2 
batch_y:4 7 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 3 <eos> 
batch_y:- 3 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:

Training epoch 2:  22%|██▏       | 10/46 [00:09<00:34,  1.05it/s]

pred_y:- 4 5 <eos> 
batch_y:- 4 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:- 3 5 <eos> 
batch_y:- 3 5 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:- 2 <eos> 2 
batch_y:- 2 <eos> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 7 <eos> 2 
batch_y:- 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 5 <eos> 
batch_y:- 2 5 <eos> 
pred_y:2 <eos> 

Training epoch 2:  24%|██▍       | 11/46 [00:10<00:35,  1.03s/it]

pred_y:- 3 9 <eos> 
batch_y:- 3 9 <eos> 
pred_y:- 3 2 <eos> 
batch_y:- 3 2 <eos> 
pred_y:4 2 <eos> 2 
batch_y:4 2 <eos> <pad> 
pred_y:3 5 <eos> 2 
batch_y:3 5 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 <eos> 2 
batch_y:- 2 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 <eos> 2 2 
batch_y:3 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 3 <eos> 2 
batch_y:4 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 3 <eos> 2 
batch_y:4 3 <eos> <pad> 
pred_y:

Training epoch 2:  26%|██▌       | 12/46 [00:11<00:36,  1.08s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:- 4 8 <eos> 
batch_y:- 4 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 3 <eos> 
batch_y:- 4 3 <eos> 
pred_y:2 7 <eos> 2 
batch_y:2 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 9 <eos> 
batch_y:- 1 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pre

Training epoch 2:  28%|██▊       | 13/46 [00:12<00:35,  1.07s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 1 <eos> 
batch_y:- 4 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 7 <eos> 
batch_y:- 2 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 1 <eos> 2 
batch_y:2 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 8 <eos> 2 
batch_y:1 8 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:

Training epoch 2:  30%|███       | 14/46 [00:13<00:32,  1.02s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 9 <eos> 2 
batch_y:1 9 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 9 <eos> 2 
batch_y:- 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 <eos> 2 
batch_y:- 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> 

Training epoch 2:  33%|███▎      | 15/46 [00:14<00:30,  1.02it/s]

pred_y:7 <eos> 2 2 
batch_y:7 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 3 <eos> 2 
batch_y:1 3 <eos> <pad> 
pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 3 <eos> 2 
batch_y:4 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 5 <eos> 2 
batch_y:3 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <e

Training epoch 2:  35%|███▍      | 16/46 [00:15<00:28,  1.05it/s]

pred_y:9 <eos> 2 2 
batch_y:9 <eos> <pad> <pad> 
pred_y:- 5 0 <eos> 
batch_y:- 5 0 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 6 <eos> 2 
batch_y:4 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 6 <eos> 2 
batch_y:2 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 <eos> 2 2 
batch_y:1 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad

Training epoch 2:  37%|███▋      | 17/46 [00:16<00:26,  1.08it/s]

pred_y:- 4 1 <eos> 
batch_y:- 4 1 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 6 <eos> 2 
batch_y:2 6 <eos> <pad> 
pred_y:2 5 <eos> 2 
batch_y:2 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:8 <e

Training epoch 2:  39%|███▉      | 18/46 [00:16<00:25,  1.09it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 1 <eos> 2 
batch_y:4 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 5 <eos> 2 
batch_y:3 5 <eos> <pad> 
pred_y:- 4 1 <eos> 
batch_y:- 4 1 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 7 <eos> 
batch_y:- 3 7 <eos> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:- 1 2 <eos> 
batch_y:- 1 2 <eos> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 3 <eos> 
batch_y:- 1 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad>

Training epoch 2:  41%|████▏     | 19/46 [00:17<00:24,  1.12it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 2 <eos> 2 
batch_y:2 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 8 <eos> 2 
batch_y:4 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 5 <eos> 
batch_y:- 1 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 5 <eos> 
batch_y:- 3 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 3 <eos> 2 
batch_y:4 3 <eos> <pad> 

Training epoch 2:  43%|████▎     | 20/46 [00:18<00:23,  1.12it/s]

pred_y:- 4 3 <eos> 
batch_y:- 4 3 <eos> 
pred_y:4 2 <eos> 2 
batch_y:4 2 <eos> <pad> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 7 <eos> 2 
batch_y:2 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 4 <eos> 
batch_y:- 4 4 <eos> 
pred_y:- 1 7 <eos> 
batch_y:- 1 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 9 <eos> 2 
batch_y:3 9 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:7 <eos> 2 2 
batch_y

Training epoch 2:  46%|████▌     | 21/46 [00:19<00:22,  1.12it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:4 8 <eos> 2 
batch_y:4 8 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 9 <eos> 
batch_y:- 3 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 1 <eos> 2 
batch_y:2 1 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad

Training epoch 2:  48%|████▊     | 22/46 [00:20<00:21,  1.12it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 3 <eos> 2 
batch_y:1 3 <eos> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 8 <eos> 

Training epoch 2:  50%|█████     | 23/46 [00:21<00:20,  1.13it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 3 <eos> 2 
batch_y:1 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 3 <eos> 
batch_y:- 2 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 

Training epoch 2:  52%|█████▏    | 24/46 [00:22<00:19,  1.13it/s]

pred_y:- 1 7 <eos> 
batch_y:- 1 7 <eos> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:9 <eos> 2 2 
batch_y:9 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 7 <eos> 2 
batch_y:4 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 2 <eos> 
batch_y:- 3 2 <eos> 
pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:

Training epoch 2:  54%|█████▍    | 25/46 [00:23<00:20,  1.04it/s]

pred_y:3 1 <eos> 2 
batch_y:3 1 <eos> <pad> 
pred_y:1 4 <eos> 2 
batch_y:1 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 7 <eos> 
batch_y:- 2 7 <eos> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:- 4 6 <eos> 
batch_y:- 4 6 <eos> 
pred_y:- 1 3 <eos> 
batch_y:- 1 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pa

Training epoch 2:  57%|█████▋    | 26/46 [00:24<00:20,  1.03s/it]

pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 6 <eos> 2 
batch_y:2 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 3 <eos> 2 
batch_y:2 3 <eos> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:2 <e

Training epoch 2:  59%|█████▊    | 27/46 [00:25<00:20,  1.07s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 7 <eos> 2 
batch_y:4 7 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 7 <eos> 2 
batch_y:1 7 <eos> <pad> 
pred_y:- 3 3 <eos> 
batch_y:- 3 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 1 <eos> 
batch_y:- 2 1 <eos> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 3 <eos> 2 
batch_y:4 3 <eos> <pad> 
pred_y:- 2 4 <eos> 
bat

Training epoch 2:  61%|██████    | 28/46 [00:26<00:18,  1.04s/it]

pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 7 <eos> 2 
batch_y:2 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 4 <eos> 2 
batch_y:1 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 6 <eos> 
batch_y:- 3 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:- 1 8 <eos> 
batch_y:- 1 8 <eos> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:1 4 <eos> 2 
batch_y:1 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:1 3 <eos> 2 

Training epoch 2:  63%|██████▎   | 29/46 [00:27<00:16,  1.00it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 6 <eos> 
batch_y:- 2 6 <eos> 
pred_y:2 1 <eos> 2 
batch_y:2 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 6 <eos> 
batch_y:- 4 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:7 <eos> 2 2 
batch_y:7 <eos> <pad> <pad> 
pred_y:- 3 3 <eos> 
batch_y:- 3 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 

Training epoch 2:  65%|██████▌   | 30/46 [00:28<00:15,  1.04it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:- 4 6 <eos> 
batch_y:- 4 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 5 <eos> 2 
batch_y:3 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:3 4 <eos

Training epoch 2:  67%|██████▋   | 31/46 [00:29<00:14,  1.06it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:3 7 <eos> 2 
batch_y:3 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:1 <eos> 2 2 
batch_y:1 <eos> <pad> <pad> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 1 <eos> 
batch_y:- 4 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 6 <eos> 
batch_y:- 3 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:2 <eos> 

Training epoch 2:  70%|██████▉   | 32/46 [00:30<00:12,  1.08it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 5 <eos> 
batch_y:- 2 5 <eos> 
pred_y:- 4 3 <eos> 
batch_y:- 4 3 <eos> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:- 4 5 <eos> 
batch_y:- 4 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 8 <eos> 
batch_y:- 1 8 <eos> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <

Training epoch 2:  72%|███████▏  | 33/46 [00:31<00:11,  1.09it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 2 <eos> 2 
batch_y:2 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 9 <eos> 2 
batch_y:3 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 6 <eos> 
batch_y:- 2 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 2 <eos> 
batch_y:- 1 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 1 <eos> 
batch_y:- 2 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 

Training epoch 2:  74%|███████▍  | 34/46 [00:31<00:10,  1.10it/s]

pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 2 <eos> 2 
batch_y:2 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:9 <eos> 2 2 
batch_y:9 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 3 <eos> 2 
batch_y:4 3 <eos> <pad> 
pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 3 <eos> 
batch_y:- 2 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 

Training epoch 2:  76%|███████▌  | 35/46 [00:32<00:09,  1.10it/s]

pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:8 <eos> 2 2 
batch_y:8 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 2 <eos> 
batch_y:- 1 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 9 <eos> 2 
batch_y:- 9 <eos> <pad> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pre

Training epoch 2:  78%|███████▊  | 36/46 [00:33<00:09,  1.11it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 5 <eos> 2 
batch_y:4 5 <eos> <pad> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:

Training epoch 2:  80%|████████  | 37/46 [00:34<00:08,  1.11it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 5 <eos> 
batch_y:- 2 5 <eos> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 2 <eos> 2 
batch_y:4 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 

Training epoch 2:  83%|████████▎ | 38/46 [00:35<00:07,  1.11it/s]

pred_y:- 3 9 <eos> 
batch_y:- 3 9 <eos> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:8 <eos> 2 2 
batch_y:8 <eos> <pad> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:3 7 <eos> 2 
batch_y:3 7 <eos> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:- 3 3 <eos> 
batch_y:- 3 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 9 <eos> 2 
batch_y:4 9 <eos> <pad> 
pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:8 <eos> 2 2 
batch_y:8 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y

Training epoch 2:  85%|████████▍ | 39/46 [00:36<00:06,  1.06it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 8 <eos> 
batch_y:- 4 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 4 <eos> 2 
batch_y:4 4 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 5 <eos> 
batch_y:- 1 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 7 <eos> 
batch_y:- 3 7 <eos> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 5 <eos> 
batch_y:- 3 5 <eos> 
pred_y:0 <eos> 2 2 
batch_y

Training epoch 2:  87%|████████▋ | 40/46 [00:37<00:06,  1.03s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:- 7 <eos> 2 
batch_y:- 7 <eos> <pad> 
pred_y:9 <eos> 2 2 
batch_y:9 <eos> <pad> <pad> 
pred_y:1 3 <eos> 2 
batch_y:1 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 5 <eos> 2 
batch_y:3 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 

Training epoch 2:  89%|████████▉ | 41/46 [00:39<00:05,  1.08s/it]

pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 5 <eos> 
batch_y:- 1 5 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 2 <eos> 
batch_y:- 3 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:7 <eos> 2 2 
batch_y:7 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 6 <eos> 
batch_y:- 3 6 <eos> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 4 <eos> 2 
batch_y:1 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:

Training epoch 2:  91%|█████████▏| 42/46 [00:40<00:04,  1.10s/it]

pred_y:- 4 4 <eos> 
batch_y:- 4 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 3 <eos> 2 
batch_y:2 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 8 <eos> 
batch_y:- 1 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 3 <eos> 
batch_y:- 3 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 

Training epoch 2:  93%|█████████▎| 43/46 [00:41<00:03,  1.05s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 5 <eos> 
batch_y:- 4 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:2 6 <eos> 2 
batch_y:2 6 <eos> <pad> 
pred_y:- 7 <eos> 2 
batch_y:- 7 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 1 <eos> 2 
batch_y:4 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 3 <eos> 2 
batch_y:3 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 2 <eos> 
batch_y:- 1 2 <eos> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:2 <eos> 2 2 
bat

Training epoch 2:  96%|█████████▌| 44/46 [00:42<00:02,  1.01s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 5 <eos> 2 
batch_y:2 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 7 <eos> 
batch_y:- 1 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 6 <eos> 
batch_y:- 3 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:1 7 <eos> 2 
batch_y:1 7 <eos> <pad> 
pre

Training epoch 2:  98%|█████████▊| 45/46 [00:43<00:01,  1.01s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:2 9 <eos> 2 
batch_y:2 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 5 <eos> 
batch_y:- 3 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 

Training epoch 2: 100%|██████████| 46/46 [00:43<00:00,  1.06it/s]


Training Loss: 0.0045507489025538314


Validation: 100%|██████████| 1/1 [00:00<00:00,  4.30it/s]


Validation Loss: 0.18998761475086212


Training epoch 3:   0%|          | 0/46 [00:00<?, ?it/s]

pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:- 2 3 <eos> 
batch_y:- 2 3 <eos> 
pred_y:2 1 <eos> 2 
batch_y:2 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 9 <eos> 2 
batch_y:- 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 <eos> 2 
batch_y:- 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:

Training epoch 3:   2%|▏         | 1/46 [00:00<00:40,  1.12it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:- 1 3 <eos> 
batch_y:- 1 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 1 <eos> 
batch_y:- 2 1 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 4 <eos> 
batch_y:- 4 4 <eos> 
pred_y:

Training epoch 3:   4%|▍         | 2/46 [00:01<00:39,  1.12it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:- 9 <eos> 2 
batch_y:- 9 <eos> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:- 9 <eos> 2 
batch_y:- 9 <eos> <pad> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 

Training epoch 3:   7%|▋         | 3/46 [00:02<00:38,  1.13it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 2 <eos> 
batch_y:- 3 2 <eos> 
pred_y:9 <eos> 2 2 
batch_y:9 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 9 <eos> 
batch_y:- 3 9 <eos> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 6 <eos> 
batch_y:- 3 6 <eos> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 1 <eos> 
batch_y:- 4 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <e

Training epoch 3:   9%|▊         | 4/46 [00:03<00:36,  1.14it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 3 <eos> 
batch_y:- 4 3 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:1 7 <eos> 2 
batch_y:1 7 <eos> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 6 <eos> 
batch_y:- 3 6 <eos> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y

Training epoch 3:  11%|█         | 5/46 [00:04<00:36,  1.13it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 6 <eos> 2 
batch_y:4 6 <eos> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 2 <eos> 
batch_y:- 3 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 9 <eos> 2 
batch_y:1 9 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pre

Training epoch 3:  13%|█▎        | 6/46 [00:05<00:35,  1.12it/s]

pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:9 <eos> 2 2 
batch_y:9 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 6 <eos> 2 
batch_y:4 6 <eos> <pad> 
pred_y:4 4 <eos> 2 
batch_y:4 4 <eos> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 7 <eos> 2 
batch_y:4 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 

Training epoch 3:  15%|█▌        | 7/46 [00:06<00:34,  1.13it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 5 <eos> 
batch_y:- 4 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 <eos> 2 
batch_y:- 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 5 <eos> 
batch_y:- 2 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:9 <eos> 2 2 
batch_y:9 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pre

Training epoch 3:  17%|█▋        | 8/46 [00:07<00:36,  1.03it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 8 <eos> 2 
batch_y:1 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 1 <eos> 
batch_y:- 2 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 3 <eos> 
batch_y:- 1 3 <eos> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 9 <eos> 
batch_y:- 3 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:0 <eos> 2 2 

Training epoch 3:  20%|█▉        | 9/46 [00:09<00:49,  1.35s/it]

pred_y:1 8 <eos> 2 
batch_y:1 8 <eos> <pad> 
pred_y:- 4 1 <eos> 
batch_y:- 4 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 6 <eos> 2 
batch_y:2 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 <eos> 2 2 
batch_y:1 <eos> <pad> <pad> 
pred_y:- 3 8 <eos> 
batch_y:- 3 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 1 <eos> 2 
batch_y:4 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 8 <eos> 
batch_y:- 4 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 7 <eos> 
batch_y:- 3 7 <eos> 
pred_y:- 3 2 <eos> 
batch_y:- 3 2 <eos> 
pred_y:3 <eos> 2 2 
bat

Training epoch 3:  22%|██▏       | 10/46 [00:11<00:51,  1.42s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:- 1 2 <eos> 
batch_y:- 1 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pre

Training epoch 3:  24%|██▍       | 11/46 [00:12<00:44,  1.28s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 1 <eos> 2 
batch_y:3 1 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 

Training epoch 3:  26%|██▌       | 12/46 [00:12<00:39,  1.16s/it]

pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 5 <eos> 
batch_y:- 3 5 <eos> 
pred_y:4 4 <eos> 2 
batch_y:4 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 6 <eos> 2 
batch_y:2 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 5 <eos> 
batch_y:- 3 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 7 <eos> 
batch_y:- 1 7 <eos> 
pred_y:- 1 9 <eos> 
batch_y:- 1 9 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 

Training epoch 3:  28%|██▊       | 13/46 [00:13<00:35,  1.08s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 2 <eos> 
batch_y:- 4 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 7 <eos> 
batch_y:- 2 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 3 <eos> 
batch_y:- 4 3 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <e

Training epoch 3:  30%|███       | 14/46 [00:14<00:32,  1.02s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 7 <eos> 2 
batch_y:- 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:1 4 <eos> 2 
batch_y:1 4 <eos> <pad> 
pred_y:- 1 5 <eos> 
batch_y:- 1 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 5 <eos> 
batch_y:- 3 5 <eos> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 4 <eos> 2 
batch_y:4 4 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 

Training epoch 3:  33%|███▎      | 15/46 [00:15<00:30,  1.03it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 9 <eos> 
batch_y:- 1 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:4 3 <eos> 2 
batch_y:4 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:- 3 4 <e

Training epoch 3:  35%|███▍      | 16/46 [00:16<00:28,  1.06it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 8 <eos> 
batch_y:- 3 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:3 9 <eos> 2 
batch_y:3 9 <eos> <pad> 
pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:1 8 <eos> 2 
batch_y:1 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 <eos> 2 
batch_y:- 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 4 <eos> 2 

Training epoch 3:  37%|███▋      | 17/46 [00:17<00:26,  1.08it/s]

pred_y:- 2 1 <eos> 
batch_y:- 2 1 <eos> 
pred_y:- 4 4 <eos> 
batch_y:- 4 4 <eos> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 8 <eos> 
batch_y:- 4 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 9 <eos> 
batch_y:- 1 9 <eos> 
pred_y:2 <eos> 2 2 

Training epoch 3:  39%|███▉      | 18/46 [00:18<00:25,  1.10it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:1 3 <eos> 2 
batch_y:1 3 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <p

Training epoch 3:  41%|████▏     | 19/46 [00:19<00:24,  1.10it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 7 <eos> 2 
batch_y:2 7 <eos> <pad> 
pred_y:- 7 <eos> 2 
batch_y:- 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <e

Training epoch 3:  43%|████▎     | 20/46 [00:20<00:23,  1.11it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:7 <eos> 2 2 
batch_y:7 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 7 <eos> 
batch_y:- 1 7 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 3 <eos> 
batch_y:- 1 3 <eos> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 7 <eos> 2 
batch_y:2 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pre

Training epoch 3:  46%|████▌     | 21/46 [00:21<00:23,  1.07it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 9 <eos> 2 
batch_y:- 9 <eos> <pad> 
pred_y:1 3 <eos> 2 
batch_y:1 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 5 <eos> 2 
batch_y:2 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:- 3 9 <eos> 
batch_y:- 3 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 

Training epoch 3:  48%|████▊     | 22/46 [00:22<00:23,  1.01it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:

Training epoch 3:  50%|█████     | 23/46 [00:23<00:23,  1.03s/it]

pred_y:- 4 4 <eos> 
batch_y:- 4 4 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 5 <eos> 2 
batch_y:2 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 4 <eos> 2 
batch_y:1 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 6 <eos> 
batch_y:- 4 6 <eos> 
pred_y:1 5 <eos> 2 
batch_y:1 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 3 <eos> 
batch_y:- 4 3 <eos> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 

Training epoch 3:  52%|█████▏    | 24/46 [00:24<00:23,  1.09s/it]

pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 2 <eos> 2 
batch_y:4 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 8 <eos> 
batch_y:- 3 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 9 <eos> 
batch_y:- 4 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 8 <eos> 
batch_y:- 1 8 <eos> 
pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:- 3 4 <eos> 
batch_y:- 3 4 <eos> 
pred_y:3 7 <eos> 2 
batch_y:3 7 <eos> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:4 8 <eos> 2 
batch_y:4 8 <eos> <pad> 
pred_y:3 8 <eos> 2 
batch_y:3 8 <eos> <pad> 
pred_y:- 3 0 <eos> 
batch_y:- 3

Training epoch 3:  54%|█████▍    | 25/46 [00:25<00:21,  1.03s/it]

pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 5 <eos> 2 
batch_y:1 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 5 <eos> 2 
batch_y:4 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:3 7 <eos> 2 
batch_y:3 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 7 <eos> 2 
batch_y:3 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 

Training epoch 3:  57%|█████▋    | 26/46 [00:26<00:19,  1.02it/s]

pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:2 6 <eos> 2 
batch_y:2 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:2 6 <eos> 2 
batch_y:2 6 <eos> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:3 7 <eos> 2 
batch_y:3 7 <eos> <pad> 
pred_y:- 3 8 <eos> 
batch_y:- 3 8 <eos> 
pred_y:1 7 <eos> 2 
batch_y:1 7 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 3 <eos> 
batch_y:- 1 3 <eos> 
pred_y:3 <eos> 2 2 
batch_y:3 <eos> <pad> <pad> 
pred_y:1 <eos> 2 2 
batch_y:1 <eos> <pa

Training epoch 3:  59%|█████▊    | 27/46 [00:27<00:18,  1.05it/s]

pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 8 <eos> 
batch_y:- 4 8 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 8 <eos> 2 
batch_y:2 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 7 <eos> 
batch_y:- 4 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <e

Training epoch 3:  61%|██████    | 28/46 [00:28<00:16,  1.07it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 5 <eos> 
batch_y:- 4 5 <eos> 
pred_y:- 2 5 <eos> 
batch_y:- 2 5 <eos> 
pred_y:2 1 <eos> 2 
batch_y:2 1 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 9 <eos> 
batch_y:- 1 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 2 <eos> 2 
batch_y:2 2 <eos> <pad> 
pred_y:- 2 5 <eos> 
batch_y:- 2 5 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 

Training epoch 3:  63%|██████▎   | 29/46 [00:28<00:15,  1.08it/s]

pred_y:- 2 7 <eos> 
batch_y:- 2 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 4 <eos> 
batch_y:- 4 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:3 7 <eos> 2 
batch_y:3 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 7 <eos> 
batch_y:- 3 7 <eos> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 3 <eos> 2 
batch_y:3 3 <eos> <pad> 
pred_y:2 <e

Training epoch 3:  65%|██████▌   | 30/46 [00:29<00:14,  1.10it/s]

pred_y:- 1 5 <eos> 
batch_y:- 1 5 <eos> 
pred_y:- 2 3 <eos> 
batch_y:- 2 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 7 <eos> 2 
batch_y:4 7 <eos> <pad> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 7 <eos> 2 
batch_y:2 7 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 3 6 <eos> 
batch_y:- 3 6 <eos> 
pred_y:- 4 9 <eos> 
batch_y:- 4 9 <eos> 
pred_y:- 1 5 <eos> 
batch_y:- 1 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos>

Training epoch 3:  67%|██████▋   | 31/46 [00:30<00:13,  1.11it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 <eos> 2 
batch_y:- 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 7 <eos> 2 
batch_y:3 7 <eos> <pad> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 7 <eos> 2 
batch_y:- 7 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:- 1 6 <eos> 
batch_y:- 1 6 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 

Training epoch 3:  70%|██████▉   | 32/46 [00:31<00:12,  1.12it/s]

pred_y:2 5 <eos> 2 
batch_y:2 5 <eos> <pad> 
pred_y:- 4 3 <eos> 
batch_y:- 4 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 8 <eos> 
batch_y:- 1 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 <eos> 2 
batch_y:- 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 1 <eos> 
batch_y:- 4 1 <eos> 
pred_y:3 3 <eos> 2 
batch_y:3 3 <eos> <pad> 
pred_y:- 2 6 <eos> 
batch_y:- 2 6 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 

Training epoch 3:  72%|███████▏  | 33/46 [00:32<00:11,  1.11it/s]

pred_y:3 5 <eos> 2 
batch_y:3 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 3 <eos> 2 
batch_y:2 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:3 0 <eos> 2 
batch_y:3 0 <eos> <pad> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 

Training epoch 3:  74%|███████▍  | 34/46 [00:33<00:10,  1.11it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 5 <eos> 
batch_y:- 1 5 <eos> 
pred_y:1 5 <eos> 2 
batch_y:1 5 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 9 <eos> 2 
batch_y:1 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 3 <eos> 
batch_y:- 2 3 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 8 <eos> 
batch_y:- 1 8 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 3 <eos> 2 
batch_y:1 3 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:- 4 5 <eos> 
batch_y:- 4 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 <eos> 2 
bat

Training epoch 3:  76%|███████▌  | 35/46 [00:34<00:09,  1.12it/s]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 4 <eos> 
batch_y:- 4 4 <eos> 
pred_y:3 3 <eos> 2 
batch_y:3 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 <eos> 2 2 
batch_y:1 <eos> <pad> <pad> 
pred_y:- 1 8 <eos> 
batch_y:- 1 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 4 <eos> 2 
batch_y:- 4 <eos> <pad> 
pred_y:2 <e

Training epoch 3:  78%|███████▊  | 36/46 [00:35<00:09,  1.03it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 0 <eos> 2 
batch_y:4 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 7 <eos> 
batch_y:- 1 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 1 <eos> 2 
batch_y:1 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 <eos> 2 
batch_y:- 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 

Training epoch 3:  80%|████████  | 37/46 [00:36<00:09,  1.04s/it]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 4 <eos> 2 
batch_y:4 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:8 <eos> 2 2 
batch_y:8 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 9 <eos> 
batch_y:- 1 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <p

Training epoch 3:  83%|████████▎ | 38/46 [00:37<00:08,  1.08s/it]

pred_y:- 8 <eos> 2 
batch_y:- 8 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:3 2 <eos> 2 
batch_y:3 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 8 <eos> 2 
batch_y:4 8 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:- 1 3 <eos> 
batch_y:- 1 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 

Training epoch 3:  85%|████████▍ | 39/46 [00:38<00:07,  1.08s/it]

pred_y:1 0 <eos> 2 
batch_y:1 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 9 <eos> 2 
batch_y:- 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:9 <eos> 2 2 
batch_y:9 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 0 <eos> 
batch_y:- 2 0 <eos> 
pre

Training epoch 3:  87%|████████▋ | 40/46 [00:39<00:06,  1.02s/it]

pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 2 <eos> 2 
batch_y:4 2 <eos> <pad> 
pred_y:- 3 1 <eos> 
batch_y:- 3 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:3 1 <eos> 2 
batch_y:3 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 0 <eos> 
batch_y:- 1 0 <eos> 
pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 <eos> 2 2 
batch_y:4 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:6 <eos> 2 2 
batch_y:6 <eos> <pad> <pad> 
pred_y:- 4 5 <eos> 
batch_y:- 4 5 <eos> 
pre

Training epoch 3:  89%|████████▉ | 41/46 [00:40<00:04,  1.03it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:1 <eos> 2 2 
batch_y:1 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:- 5 <eos> 2 
batch_y:- 5 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 2 <eos> 2 
batch_y:4 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:- 2 8 <eos> 
batch_y:- 2 8 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:4 9 <eos> 2 
batch_y:4 9 <eos> <pad> 
pred_y:- 1 8 <eos> 
batch_y:- 1 8 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 2 <eos> 
batch_y:- 2 2 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y

Training epoch 3:  91%|█████████▏| 42/46 [00:41<00:03,  1.06it/s]

pred_y:2 2 <eos> 2 
batch_y:2 2 <eos> <pad> 
pred_y:3 4 <eos> 2 
batch_y:3 4 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 5 <eos> 
batch_y:- 2 5 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 9 <eos> 2 
batch_y:- 9 <eos> <pad> 
pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 1 <eos> 2 
batch_y:- 1 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 3 <eos> 
batch_y:- 3 3 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 3 <eos> 
batch_y:- 3 3 <eos> 
pred_y:- 3 <eos> 2 
batch_y:- 3 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 

Training epoch 3:  93%|█████████▎| 43/46 [00:42<00:02,  1.08it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:5 <eos> 2 2 
batch_y:5 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:- 4 0 <eos> 
batch_y:- 4 0 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 3 <eos> 
batch_y:- 2 3 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 2 4 <eos> 
batch_y:- 2 4 <eos> 
pred_y:- 2 9 <eos> 
batch_y:- 2 9 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 4 <eos> 
batch_y:- 1 4 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 3 

Training epoch 3:  96%|█████████▌| 44/46 [00:43<00:01,  1.09it/s]

pred_y:1 2 <eos> 2 
batch_y:1 2 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 9 <eos> 2 
batch_y:- 9 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:4 6 <eos> 2 
batch_y:4 6 <eos> <pad> 
pred_y:1 5 <eos> 2 
batch_y:1 5 <eos> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 1 <eos> 
batch_y:- 1 1 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:7 <eos> 2 2 
batch_y:7 <eos> <pad> <pad> 

Training epoch 3:  98%|█████████▊| 45/46 [00:44<00:00,  1.09it/s]

pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 1 3 <eos> 
batch_y:- 1 3 <eos> 
pred_y:3 6 <eos> 2 
batch_y:3 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:1 6 <eos> 2 
batch_y:1 6 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 4 <eos> 2 
batch_y:2 4 <eos> <pad> 
pred_y:- 4 6 <eos> 
batch_y:- 4 6 <eos> 
pred_y:0 <eos> 2 2 
batch_y:0 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:- 6 <eos> 2 
batch_y:- 6 <eos> <pad> 
pred_y:- 2 7 <eos> 
batch_y:- 2 7 <eos> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 0 <eos> 2 
batch_y:2 0 <eos> <pad> 
pred_y:2 <eos> 2 2 
batch_y:2 <eos> <pad> <pad> 
pred_y:2 <eos> 2 2 

Training epoch 3: 100%|██████████| 46/46 [00:44<00:00,  1.03it/s]


Training Loss: 0.004310464162312626


Validation: 100%|██████████| 1/1 [00:00<00:00,  3.83it/s]

Validation Loss: 0.1853114366531372





In [70]:
# model = model.to(device)
# model.eval()  # 将模型设置为评估模式
# total_loss = 0.0
# num_batches = 0

# with torch.no_grad():  # 在验证过程中不需要计算梯度
#     for batch_x, batch_y, batch_x_lens, batch_y_lens in tqdm(validation_data_loader, desc="Validation"):
#         optimizer.zero_grad()
#         batch_pred_y = model(batch_x.to(device), batch_x_lens)

#         # 找等号位置
#         equ_pos = []
#         for tensor in batch_x:
#             for idx, value in enumerate(tensor):
#                 if value.item() == char_to_id['=']:
#                     equ_pos.append(idx)

#         tensor_pred_y = torch.empty(batch_y.size(0), batch_y.size(1), 18)
#         for i in range(0, batch_y.size(0)):
#             equ = equ_pos[i]+1
#             for j in range(0, batch_y.size(1)):
#                 for k in range(0, 18):
#                     tensor_pred_y[i][j][k] = batch_pred_y[i][equ + j][k]

#         tensor_pred_y = tensor_pred_y.view(-1, vocab_size)
#         batch_y = batch_y.view(-1).to(device)

#         loss = criterion(tensor_pred_y, batch_y)
#         total_loss += loss.item()
#         num_batches += 1

#     average_loss = total_loss / num_batches
#     print(f"Validation Loss: {average_loss}")


Validation: 100%|██████████| 1/1 [00:00<00:00,  4.32it/s]

Validation Loss: 0.2551415264606476





In [71]:
# # Validation
# model.eval()
# total_val_loss = 0.0
# num_val_batches = 0
# validation_process_bar = tqdm(validation_data_loader, desc="Validation")
# with torch.no_grad():
#     for val_batch_x, val_batch_y, val_batch_x_lens, val_batch_y_lens in validation_process_bar:
#         val_batch_x = val_batch_x.to(device)
#         val_batch_y = val_batch_y.to(device)

#         equ_pos = []
#         for tensor in val_batch_x:
#             for idx, value in enumerate(tensor):
#                 if value.item() == char_to_id['=']:
#                     equ_pos.append(idx)
#         val_batch_pred_y = model(val_batch_x, val_batch_x_lens)
#         tensor_y = torch.zeros_like(val_batch_x)
#         for i in range(0, val_batch_x.size(0)):
#             for j in range(0, val_batch_x.size(1)):
#                 tensor_y[i][j] = torch.argmax(val_batch_pred_y[i][j], 0)
#         tensor_pred_y = torch.empty(val_batch_y.size(0), val_batch_y.size(1), 18)
#         for i in range(0, val_batch_y.size(0)):
#             equ = equ_pos[i]+1
#             for j in range(0, val_batch_y.size(1)):
#                 for k in range(0, 18):
#                     tensor_pred_y[i][j][k] = val_batch_pred_y[i][equ + j][k]

#         val_batch_pred_y = val_batch_pred_y.view(-1, vocab_size)
#         tensor_pred_y = tensor_pred_y.view(-1, vocab_size)
#         val_batch_y = val_batch_y.view(-1).to(device)
#         val_loss = criterion(tensor_pred_y, val_batch_y)

#         total_val_loss += val_loss.item()
#         num_val_batches += 1

#         validation_process_bar.set_postfix(loss=val_loss.item())

# # 计算平均验证损失
# average_val_loss = total_val_loss / num_val_batches
# print(f"Validation Loss: {average_val_loss}")


Validation: 100%|██████████| 1/1 [00:00<00:00,  5.98it/s, loss=0.255]

Validation Loss: 0.25514155626296997



