# Image Captioning

---

<a id='step1'></a>
## Step 1: Training Setup

In [None]:
import torch
import torch.nn as nn
from torchvision import transforms
import sys
sys.path.append('cocoapi/PythonAPI')
from pycocotools.coco import COCO
from captioning.data_loader import get_loader
from captioning.model import EncoderCNN, DecoderRNN
import math



batch_size = 256           
vocab_threshold = 4        
vocab_from_file = True     
embed_size = 512           
hidden_size = 512          
num_epochs = 2             
save_every = 1             
print_every = 100          
log_file = 'training_log.txt'       

transform_train = transforms.Compose([ 
    transforms.Resize(256),                          
    transforms.RandomCrop(224),                      
    transforms.RandomHorizontalFlip(),               
    transforms.ToTensor(),                           
    transforms.Normalize((0.485, 0.456, 0.406),      
                         (0.229, 0.224, 0.225))])


data_loader = get_loader(transform=transform_train,
                         mode='train',
                         batch_size=batch_size,
                         vocab_threshold=vocab_threshold,
                         vocab_from_file=vocab_from_file)

vocab_size = len(data_loader.dataset.vocab)

encoder = EncoderCNN(embed_size)
decoder = DecoderRNN(embed_size, hidden_size, vocab_size)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoder.to(device)
decoder.to(device)

criterion = nn.CrossEntropyLoss().cuda() if torch.cuda.is_available() else nn.CrossEntropyLoss()
params = list(decoder.parameters()) + list(encoder.embed.parameters()) 
optimizer = torch.optim.Adam(params=params, lr = 0.001)
total_step = math.ceil(len(data_loader.dataset.caption_lengths) / data_loader.batch_sampler.batch_size)

Vocabulary successfully loaded from vocab.pkl file!
loading annotations into memory...
Done (t=0.31s)
creating index...
index created!
Obtaining caption lengths...


100%|██████████| 414113/414113 [00:09<00:00, 41841.77it/s]


<a id='step2'></a>
## Step 2: Train your Model

In [None]:
!pip install livelossplot --quiet

In [None]:
import torch.utils.data as data
import numpy as np
import os
import requests
import time
import sys 
from livelossplot import PlotLosses

#Training logs saved in training_log.txt and loss function plot saved as loss_function.png in images folder
f = open(log_file, 'w')
print("Training started - Loss function info will be reported every 100's steps")
liveloss = PlotLosses()
logs = {}
for epoch in range(1, num_epochs+1):
    running_loss = 0.0
    for i_step in range(1, total_step+1):
        indices = data_loader.dataset.get_train_indices()
        new_sampler = data.sampler.SubsetRandomSampler(indices=indices)
        data_loader.batch_sampler.sampler = new_sampler
        images, captions = next(iter(data_loader))
        images = images.to(device)
        captions = captions.to(device)

        decoder.zero_grad()
        encoder.zero_grad()
        features = encoder(images)
        outputs = decoder(features, captions)
        loss = criterion(outputs.view(-1, vocab_size), captions.view(-1))
        loss.backward()
        optimizer.step()
        stats = 'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f' % (epoch, num_epochs, i_step, total_step, loss.item(), np.exp(loss.item()))
        print('\r' + stats, end="")
        sys.stdout.flush()
        f.write(stats + '\n')
        f.flush()
        running_loss += loss.item()
        if i_step % print_every == 0:
            avg_loss = running_loss/print_every
            logs['Training Loss'] = avg_loss
            running_loss = 0.0
            liveloss.update(logs)
            liveloss.send()
    if epoch % save_every == 0:
        torch.save(decoder.state_dict(), os.path.join('./models', 'decoder-%d.pkl' % epoch))
        torch.save(encoder.state_dict(), os.path.join('./models', 'encoder-%d.pkl' % epoch))

f.close()

Complete training log

~~~~
Epoch [1/3], Step [1/1618], Loss: 9.1976, Perplexity: 9873.7753
Epoch [1/3], Step [2/1618], Loss: 9.0299, Perplexity: 8349.3557
Epoch [1/3], Step [3/1618], Loss: 8.8347, Perplexity: 6868.6935
Epoch [1/3], Step [4/1618], Loss: 8.5840, Perplexity: 5345.5191
Epoch [1/3], Step [5/1618], Loss: 8.2719, Perplexity: 3912.2418
Epoch [1/3], Step [6/1618], Loss: 7.7041, Perplexity: 2217.3185
Epoch [1/3], Step [7/1618], Loss: 7.1511, Perplexity: 1275.4640
Epoch [1/3], Step [8/1618], Loss: 6.4671, Perplexity: 643.6340
Epoch [1/3], Step [9/1618], Loss: 6.1721, Perplexity: 479.2062
Epoch [1/3], Step [10/1618], Loss: 5.6683, Perplexity: 289.5430
Epoch [1/3], Step [11/1618], Loss: 5.4118, Perplexity: 224.0427
Epoch [1/3], Step [12/1618], Loss: 5.3785, Perplexity: 216.6916
Epoch [1/3], Step [13/1618], Loss: 5.3182, Perplexity: 204.0140
Epoch [1/3], Step [14/1618], Loss: 5.2699, Perplexity: 194.3999
Epoch [1/3], Step [15/1618], Loss: 5.4076, Perplexity: 223.0845
Epoch [1/3], Step [16/1618], Loss: 5.3592, Perplexity: 212.5527
Epoch [1/3], Step [17/1618], Loss: 5.3844, Perplexity: 217.9769
Epoch [1/3], Step [18/1618], Loss: 5.2602, Perplexity: 192.5202
Epoch [1/3], Step [19/1618], Loss: 5.1611, Perplexity: 174.3596
Epoch [1/3], Step [20/1618], Loss: 5.1253, Perplexity: 168.2256
Epoch [1/3], Step [21/1618], Loss: 4.9696, Perplexity: 143.9730
Epoch [1/3], Step [22/1618], Loss: 4.9889, Perplexity: 146.7793
Epoch [1/3], Step [23/1618], Loss: 4.9321, Perplexity: 138.6695
Epoch [1/3], Step [24/1618], Loss: 4.8513, Perplexity: 127.9015
Epoch [1/3], Step [25/1618], Loss: 4.8652, Perplexity: 129.6937
Epoch [1/3], Step [26/1618], Loss: 4.8068, Perplexity: 122.3402
Epoch [1/3], Step [27/1618], Loss: 4.7937, Perplexity: 120.7459
Epoch [1/3], Step [28/1618], Loss: 4.7564, Perplexity: 116.3308
Epoch [1/3], Step [29/1618], Loss: 4.6980, Perplexity: 109.7276
Epoch [1/3], Step [30/1618], Loss: 4.7750, Perplexity: 118.5099
Epoch [1/3], Step [31/1618], Loss: 4.8000, Perplexity: 121.5157
Epoch [1/3], Step [32/1618], Loss: 4.5449, Perplexity: 94.1557
Epoch [1/3], Step [33/1618], Loss: 5.0498, Perplexity: 155.9870
Epoch [1/3], Step [34/1618], Loss: 4.6708, Perplexity: 106.7884
Epoch [1/3], Step [35/1618], Loss: 4.4447, Perplexity: 85.1761
Epoch [1/3], Step [36/1618], Loss: 4.5147, Perplexity: 91.3476
Epoch [1/3], Step [37/1618], Loss: 4.3895, Perplexity: 80.6020
Epoch [1/3], Step [38/1618], Loss: 4.3669, Perplexity: 78.7987
Epoch [1/3], Step [39/1618], Loss: 4.3649, Perplexity: 78.6412
Epoch [1/3], Step [40/1618], Loss: 4.4876, Perplexity: 88.9082
Epoch [1/3], Step [41/1618], Loss: 4.3144, Perplexity: 74.7703
Epoch [1/3], Step [42/1618], Loss: 4.3587, Perplexity: 78.1533
Epoch [1/3], Step [43/1618], Loss: 4.3954, Perplexity: 81.0764
Epoch [1/3], Step [44/1618], Loss: 4.2049, Perplexity: 67.0143
Epoch [1/3], Step [45/1618], Loss: 4.0972, Perplexity: 60.1698
Epoch [1/3], Step [46/1618], Loss: 4.3898, Perplexity: 80.6253
Epoch [1/3], Step [47/1618], Loss: 4.1755, Perplexity: 65.0692
Epoch [1/3], Step [48/1618], Loss: 4.1347, Perplexity: 62.4730
Epoch [1/3], Step [49/1618], Loss: 4.0517, Perplexity: 57.4954
Epoch [1/3], Step [50/1618], Loss: 4.0793, Perplexity: 59.1068
Epoch [1/3], Step [51/1618], Loss: 4.0329, Perplexity: 56.4249
Epoch [1/3], Step [52/1618], Loss: 3.9309, Perplexity: 50.9536
Epoch [1/3], Step [53/1618], Loss: 4.0564, Perplexity: 57.7648
Epoch [1/3], Step [54/1618], Loss: 3.9995, Perplexity: 54.5696
Epoch [1/3], Step [55/1618], Loss: 4.0436, Perplexity: 57.0286
Epoch [1/3], Step [56/1618], Loss: 4.5612, Perplexity: 95.6953
Epoch [1/3], Step [57/1618], Loss: 3.9677, Perplexity: 52.8619
Epoch [1/3], Step [58/1618], Loss: 3.9296, Perplexity: 50.8878
Epoch [1/3], Step [59/1618], Loss: 4.2334, Perplexity: 68.9535
Epoch [1/3], Step [60/1618], Loss: 3.8334, Perplexity: 46.2200
Epoch [1/3], Step [61/1618], Loss: 3.8817, Perplexity: 48.5080
Epoch [1/3], Step [62/1618], Loss: 3.9092, Perplexity: 49.8585
Epoch [1/3], Step [63/1618], Loss: 3.8360, Perplexity: 46.3376
Epoch [1/3], Step [64/1618], Loss: 3.9089, Perplexity: 49.8416
Epoch [1/3], Step [65/1618], Loss: 3.8325, Perplexity: 46.1775
Epoch [1/3], Step [66/1618], Loss: 4.2029, Perplexity: 66.8778
Epoch [1/3], Step [67/1618], Loss: 3.8295, Perplexity: 46.0387
Epoch [1/3], Step [68/1618], Loss: 3.6291, Perplexity: 37.6801
Epoch [1/3], Step [69/1618], Loss: 3.8690, Perplexity: 47.8931
Epoch [1/3], Step [70/1618], Loss: 3.7886, Perplexity: 44.1946
Epoch [1/3], Step [71/1618], Loss: 3.7287, Perplexity: 41.6229
Epoch [1/3], Step [72/1618], Loss: 4.0904, Perplexity: 59.7622
Epoch [1/3], Step [73/1618], Loss: 3.7227, Perplexity: 41.3756
Epoch [1/3], Step [74/1618], Loss: 3.8019, Perplexity: 44.7868
Epoch [1/3], Step [75/1618], Loss: 3.7870, Perplexity: 44.1229
Epoch [1/3], Step [76/1618], Loss: 3.6027, Perplexity: 36.6976
Epoch [1/3], Step [77/1618], Loss: 3.7672, Perplexity: 43.2607
Epoch [1/3], Step [78/1618], Loss: 4.0715, Perplexity: 58.6445
Epoch [1/3], Step [79/1618], Loss: 3.6712, Perplexity: 39.3009
Epoch [1/3], Step [80/1618], Loss: 4.0514, Perplexity: 57.4753
Epoch [1/3], Step [81/1618], Loss: 3.6911, Perplexity: 40.0877
Epoch [1/3], Step [82/1618], Loss: 3.7772, Perplexity: 43.6921
Epoch [1/3], Step [83/1618], Loss: 3.8740, Perplexity: 48.1348
Epoch [1/3], Step [84/1618], Loss: 3.8417, Perplexity: 46.6036
Epoch [1/3], Step [85/1618], Loss: 3.7898, Perplexity: 44.2494
Epoch [1/3], Step [86/1618], Loss: 3.7089, Perplexity: 40.8071
Epoch [1/3], Step [87/1618], Loss: 3.6874, Perplexity: 39.9392
Epoch [1/3], Step [88/1618], Loss: 3.6356, Perplexity: 37.9251
Epoch [1/3], Step [89/1618], Loss: 3.7067, Perplexity: 40.7182
Epoch [1/3], Step [90/1618], Loss: 3.9437, Perplexity: 51.6083
Epoch [1/3], Step [91/1618], Loss: 3.6798, Perplexity: 39.6394
Epoch [1/3], Step [92/1618], Loss: 3.6599, Perplexity: 38.8577
Epoch [1/3], Step [93/1618], Loss: 3.7897, Perplexity: 44.2436
Epoch [1/3], Step [94/1618], Loss: 3.5762, Perplexity: 35.7385
Epoch [1/3], Step [95/1618], Loss: 3.6970, Perplexity: 40.3247
Epoch [1/3], Step [96/1618], Loss: 3.5564, Perplexity: 35.0368
Epoch [1/3], Step [97/1618], Loss: 3.4297, Perplexity: 30.8661
Epoch [1/3], Step [98/1618], Loss: 4.3466, Perplexity: 77.2145
Epoch [1/3], Step [99/1618], Loss: 3.4712, Perplexity: 32.1761
Epoch [1/3], Step [100/1618], Loss: 3.5413, Perplexity: 34.5116
Epoch [1/3], Step [101/1618], Loss: 3.4118, Perplexity: 30.3206
Epoch [1/3], Step [102/1618], Loss: 3.6101, Perplexity: 36.9685
Epoch [1/3], Step [103/1618], Loss: 3.6910, Perplexity: 40.0866
Epoch [1/3], Step [104/1618], Loss: 4.1286, Perplexity: 62.0916
Epoch [1/3], Step [105/1618], Loss: 3.5755, Perplexity: 35.7138
Epoch [1/3], Step [106/1618], Loss: 3.4834, Perplexity: 32.5704
Epoch [1/3], Step [107/1618], Loss: 3.4854, Perplexity: 32.6355
Epoch [1/3], Step [108/1618], Loss: 3.4782, Perplexity: 32.4020
Epoch [1/3], Step [109/1618], Loss: 3.4229, Perplexity: 30.6567
Epoch [1/3], Step [110/1618], Loss: 3.3687, Perplexity: 29.0406
Epoch [1/3], Step [111/1618], Loss: 3.5714, Perplexity: 35.5678
Epoch [1/3], Step [112/1618], Loss: 3.5754, Perplexity: 35.7089
Epoch [1/3], Step [113/1618], Loss: 3.3780, Perplexity: 29.3135
Epoch [1/3], Step [114/1618], Loss: 3.6568, Perplexity: 38.7368
Epoch [1/3], Step [115/1618], Loss: 3.3949, Perplexity: 29.8104
Epoch [1/3], Step [116/1618], Loss: 3.4931, Perplexity: 32.8877
Epoch [1/3], Step [117/1618], Loss: 3.4427, Perplexity: 31.2704
Epoch [1/3], Step [118/1618], Loss: 3.3991, Perplexity: 29.9381
Epoch [1/3], Step [119/1618], Loss: 3.4088, Perplexity: 30.2300
Epoch [1/3], Step [120/1618], Loss: 3.3511, Perplexity: 28.5343
Epoch [1/3], Step [121/1618], Loss: 3.5059, Perplexity: 33.3130
Epoch [1/3], Step [122/1618], Loss: 3.5621, Perplexity: 35.2380
Epoch [1/3], Step [123/1618], Loss: 3.8372, Perplexity: 46.3974
Epoch [1/3], Step [124/1618], Loss: 3.5575, Perplexity: 35.0748
Epoch [1/3], Step [125/1618], Loss: 3.3972, Perplexity: 29.8802
Epoch [1/3], Step [126/1618], Loss: 3.4266, Perplexity: 30.7728
Epoch [1/3], Step [127/1618], Loss: 3.4202, Perplexity: 30.5765
Epoch [1/3], Step [128/1618], Loss: 3.4199, Perplexity: 30.5676
Epoch [1/3], Step [129/1618], Loss: 4.1583, Perplexity: 63.9631
Epoch [1/3], Step [130/1618], Loss: 3.4494, Perplexity: 31.4822
Epoch [1/3], Step [131/1618], Loss: 3.7918, Perplexity: 44.3350
Epoch [1/3], Step [132/1618], Loss: 3.3980, Perplexity: 29.9040
Epoch [1/3], Step [133/1618], Loss: 3.3572, Perplexity: 28.7075
Epoch [1/3], Step [134/1618], Loss: 3.3778, Perplexity: 29.3068
Epoch [1/3], Step [135/1618], Loss: 3.3564, Perplexity: 28.6850
Epoch [1/3], Step [136/1618], Loss: 3.3109, Perplexity: 27.4090
Epoch [1/3], Step [137/1618], Loss: 3.4339, Perplexity: 30.9969
Epoch [1/3], Step [138/1618], Loss: 3.3942, Perplexity: 29.7917
Epoch [1/3], Step [139/1618], Loss: 3.3693, Perplexity: 29.0595
Epoch [1/3], Step [140/1618], Loss: 3.4137, Perplexity: 30.3789
Epoch [1/3], Step [141/1618], Loss: 3.2759, Perplexity: 26.4662
Epoch [1/3], Step [142/1618], Loss: 3.3322, Perplexity: 27.9991
Epoch [1/3], Step [143/1618], Loss: 3.7857, Perplexity: 44.0675
Epoch [1/3], Step [144/1618], Loss: 3.3600, Perplexity: 28.7887
Epoch [1/3], Step [145/1618], Loss: 3.3375, Perplexity: 28.1495
Epoch [1/3], Step [146/1618], Loss: 3.4707, Perplexity: 32.1602
Epoch [1/3], Step [147/1618], Loss: 3.4212, Perplexity: 30.6073
Epoch [1/3], Step [148/1618], Loss: 3.3086, Perplexity: 27.3454
Epoch [1/3], Step [149/1618], Loss: 3.3141, Perplexity: 27.4989
Epoch [1/3], Step [150/1618], Loss: 3.3700, Perplexity: 29.0792
Epoch [1/3], Step [151/1618], Loss: 3.2687, Perplexity: 26.2759
Epoch [1/3], Step [152/1618], Loss: 3.6364, Perplexity: 37.9563
Epoch [1/3], Step [153/1618], Loss: 3.5126, Perplexity: 33.5354
Epoch [1/3], Step [154/1618], Loss: 3.2916, Perplexity: 26.8857
Epoch [1/3], Step [155/1618], Loss: 3.3712, Perplexity: 29.1139
Epoch [1/3], Step [156/1618], Loss: 3.3585, Perplexity: 28.7461
Epoch [1/3], Step [157/1618], Loss: 3.1357, Perplexity: 23.0053
Epoch [1/3], Step [158/1618], Loss: 3.9316, Perplexity: 50.9904
Epoch [1/3], Step [159/1618], Loss: 4.7949, Perplexity: 120.8970
Epoch [1/3], Step [160/1618], Loss: 3.5205, Perplexity: 33.8000
Epoch [1/3], Step [161/1618], Loss: 3.4974, Perplexity: 33.0304
Epoch [1/3], Step [162/1618], Loss: 3.2325, Perplexity: 25.3421
Epoch [1/3], Step [163/1618], Loss: 3.3857, Perplexity: 29.5386
Epoch [1/3], Step [164/1618], Loss: 3.2366, Perplexity: 25.4463
Epoch [1/3], Step [165/1618], Loss: 3.2912, Perplexity: 26.8764
Epoch [1/3], Step [166/1618], Loss: 3.4831, Perplexity: 32.5613
Epoch [1/3], Step [167/1618], Loss: 3.5989, Perplexity: 36.5580
Epoch [1/3], Step [168/1618], Loss: 3.3060, Perplexity: 27.2746
Epoch [1/3], Step [169/1618], Loss: 3.1692, Perplexity: 23.7890
Epoch [1/3], Step [170/1618], Loss: 3.2373, Perplexity: 25.4645
Epoch [1/3], Step [171/1618], Loss: 3.2610, Perplexity: 26.0767
Epoch [1/3], Step [172/1618], Loss: 3.7748, Perplexity: 43.5895
Epoch [1/3], Step [173/1618], Loss: 3.1425, Perplexity: 23.1628
Epoch [1/3], Step [174/1618], Loss: 3.2013, Perplexity: 24.5653
Epoch [1/3], Step [175/1618], Loss: 3.3602, Perplexity: 28.7937
Epoch [1/3], Step [176/1618], Loss: 3.1535, Perplexity: 23.4188
Epoch [1/3], Step [177/1618], Loss: 3.3090, Perplexity: 27.3584
Epoch [1/3], Step [178/1618], Loss: 3.5115, Perplexity: 33.4991
Epoch [1/3], Step [179/1618], Loss: 3.6668, Perplexity: 39.1273
Epoch [1/3], Step [180/1618], Loss: 3.2432, Perplexity: 25.6147
Epoch [1/3], Step [181/1618], Loss: 3.5211, Perplexity: 33.8230
Epoch [1/3], Step [182/1618], Loss: 3.2698, Perplexity: 26.3056
Epoch [1/3], Step [183/1618], Loss: 3.3212, Perplexity: 27.6928
Epoch [1/3], Step [184/1618], Loss: 3.2461, Perplexity: 25.6888
Epoch [1/3], Step [185/1618], Loss: 3.2449, Perplexity: 25.6584
Epoch [1/3], Step [186/1618], Loss: 3.2556, Perplexity: 25.9351
Epoch [1/3], Step [187/1618], Loss: 3.2389, Perplexity: 25.5054
Epoch [1/3], Step [188/1618], Loss: 3.1983, Perplexity: 24.4912
Epoch [1/3], Step [189/1618], Loss: 3.2530, Perplexity: 25.8686
Epoch [1/3], Step [190/1618], Loss: 3.5449, Perplexity: 34.6370
Epoch [1/3], Step [191/1618], Loss: 3.1072, Perplexity: 22.3586
Epoch [1/3], Step [192/1618], Loss: 3.1936, Perplexity: 24.3764
Epoch [1/3], Step [193/1618], Loss: 3.4500, Perplexity: 31.5001
Epoch [1/3], Step [194/1618], Loss: 3.8485, Perplexity: 46.9248
Epoch [1/3], Step [195/1618], Loss: 3.1484, Perplexity: 23.2997
Epoch [1/3], Step [196/1618], Loss: 3.2332, Perplexity: 25.3603
Epoch [1/3], Step [197/1618], Loss: 3.5776, Perplexity: 35.7863
Epoch [1/3], Step [198/1618], Loss: 3.3533, Perplexity: 28.5974
Epoch [1/3], Step [199/1618], Loss: 3.1967, Perplexity: 24.4512
Epoch [1/3], Step [200/1618], Loss: 3.2438, Perplexity: 25.6319
Epoch [1/3], Step [201/1618], Loss: 3.6917, Perplexity: 40.1134
Epoch [1/3], Step [202/1618], Loss: 3.1019, Perplexity: 22.2395
Epoch [1/3], Step [203/1618], Loss: 3.2710, Perplexity: 26.3376
Epoch [1/3], Step [204/1618], Loss: 3.2476, Perplexity: 25.7286
Epoch [1/3], Step [205/1618], Loss: 3.3339, Perplexity: 28.0467
Epoch [1/3], Step [206/1618], Loss: 3.4544, Perplexity: 31.6402
Epoch [1/3], Step [207/1618], Loss: 3.1246, Perplexity: 22.7516
Epoch [1/3], Step [208/1618], Loss: 3.2518, Perplexity: 25.8365
Epoch [1/3], Step [209/1618], Loss: 3.3355, Perplexity: 28.0920
Epoch [1/3], Step [210/1618], Loss: 3.2323, Perplexity: 25.3377
Epoch [1/3], Step [211/1618], Loss: 3.2828, Perplexity: 26.6492
Epoch [1/3], Step [212/1618], Loss: 3.3293, Perplexity: 27.9184
Epoch [1/3], Step [213/1618], Loss: 3.2318, Perplexity: 25.3251
Epoch [1/3], Step [214/1618], Loss: 3.2006, Perplexity: 24.5468
Epoch [1/3], Step [215/1618], Loss: 3.5506, Perplexity: 34.8343
Epoch [1/3], Step [216/1618], Loss: 3.1230, Perplexity: 22.7137
Epoch [1/3], Step [217/1618], Loss: 3.0746, Perplexity: 21.6409
Epoch [1/3], Step [218/1618], Loss: 3.0907, Perplexity: 21.9915
Epoch [1/3], Step [219/1618], Loss: 3.0310, Perplexity: 20.7171
Epoch [1/3], Step [220/1618], Loss: 3.4951, Perplexity: 32.9529
Epoch [1/3], Step [221/1618], Loss: 3.0602, Perplexity: 21.3326
Epoch [1/3], Step [222/1618], Loss: 3.1211, Perplexity: 22.6714
Epoch [1/3], Step [223/1618], Loss: 3.4529, Perplexity: 31.5922
Epoch [1/3], Step [224/1618], Loss: 3.5601, Perplexity: 35.1676
Epoch [1/3], Step [225/1618], Loss: 2.9875, Perplexity: 19.8368
Epoch [1/3], Step [226/1618], Loss: 3.0501, Perplexity: 21.1164
Epoch [1/3], Step [227/1618], Loss: 3.1431, Perplexity: 23.1745
Epoch [1/3], Step [228/1618], Loss: 3.0731, Perplexity: 21.6085
Epoch [1/3], Step [229/1618], Loss: 3.0550, Perplexity: 21.2216
Epoch [1/3], Step [230/1618], Loss: 3.0931, Perplexity: 22.0456
Epoch [1/3], Step [231/1618], Loss: 3.4544, Perplexity: 31.6389
Epoch [1/3], Step [232/1618], Loss: 3.3856, Perplexity: 29.5349
Epoch [1/3], Step [233/1618], Loss: 2.9918, Perplexity: 19.9214
Epoch [1/3], Step [234/1618], Loss: 3.3825, Perplexity: 29.4445
Epoch [1/3], Step [235/1618], Loss: 2.9556, Perplexity: 19.2138
Epoch [1/3], Step [236/1618], Loss: 3.0061, Perplexity: 20.2091
Epoch [1/3], Step [237/1618], Loss: 3.0130, Perplexity: 20.3491
Epoch [1/3], Step [238/1618], Loss: 3.1424, Perplexity: 23.1588
Epoch [1/3], Step [239/1618], Loss: 2.9818, Perplexity: 19.7225
Epoch [1/3], Step [240/1618], Loss: 3.0572, Perplexity: 21.2677
Epoch [1/3], Step [241/1618], Loss: 3.0652, Perplexity: 21.4385
Epoch [1/3], Step [242/1618], Loss: 3.0271, Perplexity: 20.6382
Epoch [1/3], Step [243/1618], Loss: 3.0780, Perplexity: 21.7141
Epoch [1/3], Step [244/1618], Loss: 3.0058, Perplexity: 20.2023
Epoch [1/3], Step [245/1618], Loss: 3.2375, Perplexity: 25.4700
Epoch [1/3], Step [246/1618], Loss: 3.0642, Perplexity: 21.4174
Epoch [1/3], Step [247/1618], Loss: 3.0600, Perplexity: 21.3277
Epoch [1/3], Step [248/1618], Loss: 2.8785, Perplexity: 17.7881
Epoch [1/3], Step [249/1618], Loss: 3.0417, Perplexity: 20.9419
Epoch [1/3], Step [250/1618], Loss: 3.1163, Perplexity: 22.5617
Epoch [1/3], Step [251/1618], Loss: 3.1855, Perplexity: 24.1791
Epoch [1/3], Step [252/1618], Loss: 2.9302, Perplexity: 18.7320
Epoch [1/3], Step [253/1618], Loss: 2.8515, Perplexity: 17.3130
Epoch [1/3], Step [254/1618], Loss: 3.2569, Perplexity: 25.9679
Epoch [1/3], Step [255/1618], Loss: 3.0773, Perplexity: 21.7008
Epoch [1/3], Step [256/1618], Loss: 2.9248, Perplexity: 18.6306
Epoch [1/3], Step [257/1618], Loss: 3.0067, Perplexity: 20.2208
Epoch [1/3], Step [258/1618], Loss: 3.0455, Perplexity: 21.0206
Epoch [1/3], Step [259/1618], Loss: 3.0218, Perplexity: 20.5291
Epoch [1/3], Step [260/1618], Loss: 3.1655, Perplexity: 23.7004
Epoch [1/3], Step [261/1618], Loss: 2.9461, Perplexity: 19.0320
Epoch [1/3], Step [262/1618], Loss: 3.0634, Perplexity: 21.3997
Epoch [1/3], Step [263/1618], Loss: 3.0296, Perplexity: 20.6895
Epoch [1/3], Step [264/1618], Loss: 3.0719, Perplexity: 21.5835
Epoch [1/3], Step [265/1618], Loss: 2.9541, Perplexity: 19.1849
Epoch [1/3], Step [266/1618], Loss: 3.1066, Perplexity: 22.3444
Epoch [1/3], Step [267/1618], Loss: 2.8578, Perplexity: 17.4231
Epoch [1/3], Step [268/1618], Loss: 2.8956, Perplexity: 18.0939
Epoch [1/3], Step [269/1618], Loss: 2.9739, Perplexity: 19.5674
Epoch [1/3], Step [270/1618], Loss: 2.9688, Perplexity: 19.4676
Epoch [1/3], Step [271/1618], Loss: 2.9572, Perplexity: 19.2442
Epoch [1/3], Step [272/1618], Loss: 2.9173, Perplexity: 18.4912
Epoch [1/3], Step [273/1618], Loss: 2.9237, Perplexity: 18.6108
Epoch [1/3], Step [274/1618], Loss: 3.0995, Perplexity: 22.1877
Epoch [1/3], Step [275/1618], Loss: 2.9860, Perplexity: 19.8073
Epoch [1/3], Step [276/1618], Loss: 2.9338, Perplexity: 18.7992
Epoch [1/3], Step [277/1618], Loss: 4.8672, Perplexity: 129.9535
Epoch [1/3], Step [278/1618], Loss: 2.9104, Perplexity: 18.3635
Epoch [1/3], Step [279/1618], Loss: 2.9281, Perplexity: 18.6912
Epoch [1/3], Step [280/1618], Loss: 3.2690, Perplexity: 26.2849
Epoch [1/3], Step [281/1618], Loss: 3.1251, Perplexity: 22.7628
Epoch [1/3], Step [282/1618], Loss: 2.9085, Perplexity: 18.3287
Epoch [1/3], Step [283/1618], Loss: 2.9433, Perplexity: 18.9779
Epoch [1/3], Step [284/1618], Loss: 2.9888, Perplexity: 19.8611
Epoch [1/3], Step [285/1618], Loss: 2.9831, Perplexity: 19.7488
Epoch [1/3], Step [286/1618], Loss: 2.9043, Perplexity: 18.2518
Epoch [1/3], Step [287/1618], Loss: 2.9236, Perplexity: 18.6079
Epoch [1/3], Step [288/1618], Loss: 3.0028, Perplexity: 20.1422
Epoch [1/3], Step [289/1618], Loss: 2.9578, Perplexity: 19.2550
Epoch [1/3], Step [290/1618], Loss: 2.9290, Perplexity: 18.7083
Epoch [1/3], Step [291/1618], Loss: 2.9596, Perplexity: 19.2896
Epoch [1/3], Step [292/1618], Loss: 2.9792, Perplexity: 19.6717
Epoch [1/3], Step [293/1618], Loss: 2.9858, Perplexity: 19.8033
Epoch [1/3], Step [294/1618], Loss: 2.8667, Perplexity: 17.5781
Epoch [1/3], Step [295/1618], Loss: 2.9979, Perplexity: 20.0431
Epoch [1/3], Step [296/1618], Loss: 2.8711, Perplexity: 17.6567
Epoch [1/3], Step [297/1618], Loss: 3.0117, Perplexity: 20.3212
Epoch [1/3], Step [298/1618], Loss: 2.8912, Perplexity: 18.0147
Epoch [1/3], Step [299/1618], Loss: 2.9304, Perplexity: 18.7347
Epoch [1/3], Step [300/1618], Loss: 2.8320, Perplexity: 16.9801
Epoch [1/3], Step [301/1618], Loss: 2.7523, Perplexity: 15.6788
Epoch [1/3], Step [302/1618], Loss: 3.0626, Perplexity: 21.3841
Epoch [1/3], Step [303/1618], Loss: 2.9327, Perplexity: 18.7778
Epoch [1/3], Step [304/1618], Loss: 2.8621, Perplexity: 17.4976
Epoch [1/3], Step [305/1618], Loss: 2.9117, Perplexity: 18.3883
Epoch [1/3], Step [306/1618], Loss: 3.3339, Perplexity: 28.0476
Epoch [1/3], Step [307/1618], Loss: 2.8914, Perplexity: 18.0192
Epoch [1/3], Step [308/1618], Loss: 3.2363, Perplexity: 25.4403
Epoch [1/3], Step [309/1618], Loss: 2.9181, Perplexity: 18.5054
Epoch [1/3], Step [310/1618], Loss: 2.9909, Perplexity: 19.9036
Epoch [1/3], Step [311/1618], Loss: 3.1262, Perplexity: 22.7863
Epoch [1/3], Step [312/1618], Loss: 2.9165, Perplexity: 18.4765
Epoch [1/3], Step [313/1618], Loss: 3.1070, Perplexity: 22.3543
Epoch [1/3], Step [314/1618], Loss: 2.8393, Perplexity: 17.1041
Epoch [1/3], Step [315/1618], Loss: 3.0066, Perplexity: 20.2194
Epoch [1/3], Step [316/1618], Loss: 2.7963, Perplexity: 16.3838
Epoch [1/3], Step [317/1618], Loss: 2.7948, Perplexity: 16.3593
Epoch [1/3], Step [318/1618], Loss: 3.0338, Perplexity: 20.7770
Epoch [1/3], Step [319/1618], Loss: 2.9360, Perplexity: 18.8401
Epoch [1/3], Step [320/1618], Loss: 2.9012, Perplexity: 18.1953
Epoch [1/3], Step [321/1618], Loss: 2.9213, Perplexity: 18.5651
Epoch [1/3], Step [322/1618], Loss: 2.8510, Perplexity: 17.3044
Epoch [1/3], Step [323/1618], Loss: 2.9234, Perplexity: 18.6050
Epoch [1/3], Step [324/1618], Loss: 2.7751, Perplexity: 16.0404
Epoch [1/3], Step [325/1618], Loss: 2.8864, Perplexity: 17.9292
Epoch [1/3], Step [326/1618], Loss: 2.8170, Perplexity: 16.7262
Epoch [1/3], Step [327/1618], Loss: 2.9849, Perplexity: 19.7843
Epoch [1/3], Step [328/1618], Loss: 2.7780, Perplexity: 16.0861
Epoch [1/3], Step [329/1618], Loss: 2.8299, Perplexity: 16.9439
Epoch [1/3], Step [330/1618], Loss: 2.8046, Perplexity: 16.5206
Epoch [1/3], Step [331/1618], Loss: 2.8341, Perplexity: 17.0159
Epoch [1/3], Step [332/1618], Loss: 2.7361, Perplexity: 15.4265
Epoch [1/3], Step [333/1618], Loss: 2.6915, Perplexity: 14.7540
Epoch [1/3], Step [334/1618], Loss: 3.2347, Perplexity: 25.3994
Epoch [1/3], Step [335/1618], Loss: 3.6254, Perplexity: 37.5383
Epoch [1/3], Step [336/1618], Loss: 3.3739, Perplexity: 29.1929
Epoch [1/3], Step [337/1618], Loss: 2.8859, Perplexity: 17.9203
Epoch [1/3], Step [338/1618], Loss: 3.3122, Perplexity: 27.4465
Epoch [1/3], Step [339/1618], Loss: 2.8361, Perplexity: 17.0496
Epoch [1/3], Step [340/1618], Loss: 2.9962, Perplexity: 20.0091
Epoch [1/3], Step [341/1618], Loss: 2.6926, Perplexity: 14.7703
Epoch [1/3], Step [342/1618], Loss: 2.7943, Perplexity: 16.3519
Epoch [1/3], Step [343/1618], Loss: 2.8693, Perplexity: 17.6243
Epoch [1/3], Step [344/1618], Loss: 2.9460, Perplexity: 19.0288
Epoch [1/3], Step [345/1618], Loss: 2.8210, Perplexity: 16.7939
Epoch [1/3], Step [346/1618], Loss: 3.3847, Perplexity: 29.5093
Epoch [1/3], Step [347/1618], Loss: 2.8898, Perplexity: 17.9895
Epoch [1/3], Step [348/1618], Loss: 2.7603, Perplexity: 15.8043
Epoch [1/3], Step [349/1618], Loss: 2.9188, Perplexity: 18.5188
Epoch [1/3], Step [350/1618], Loss: 2.8459, Perplexity: 17.2168
Epoch [1/3], Step [351/1618], Loss: 2.9358, Perplexity: 18.8369
Epoch [1/3], Step [352/1618], Loss: 2.9663, Perplexity: 19.4208
Epoch [1/3], Step [353/1618], Loss: 2.6954, Perplexity: 14.8108
Epoch [1/3], Step [354/1618], Loss: 3.2657, Perplexity: 26.1994
Epoch [1/3], Step [355/1618], Loss: 2.8388, Perplexity: 17.0957
Epoch [1/3], Step [356/1618], Loss: 3.0097, Perplexity: 20.2815
Epoch [1/3], Step [357/1618], Loss: 2.8577, Perplexity: 17.4215
Epoch [1/3], Step [358/1618], Loss: 2.7639, Perplexity: 15.8617
Epoch [1/3], Step [359/1618], Loss: 2.8112, Perplexity: 16.6291
Epoch [1/3], Step [360/1618], Loss: 2.7788, Perplexity: 16.0999
Epoch [1/3], Step [361/1618], Loss: 2.9763, Perplexity: 19.6160
Epoch [1/3], Step [362/1618], Loss: 2.7629, Perplexity: 15.8458
Epoch [1/3], Step [363/1618], Loss: 3.0105, Perplexity: 20.2969
Epoch [1/3], Step [364/1618], Loss: 2.7444, Perplexity: 15.5553
Epoch [1/3], Step [365/1618], Loss: 2.7772, Perplexity: 16.0733
Epoch [1/3], Step [366/1618], Loss: 3.1874, Perplexity: 24.2251
Epoch [1/3], Step [367/1618], Loss: 4.2787, Perplexity: 72.1486
Epoch [1/3], Step [368/1618], Loss: 2.7874, Perplexity: 16.2387
Epoch [1/3], Step [369/1618], Loss: 3.2200, Perplexity: 25.0269
Epoch [1/3], Step [370/1618], Loss: 2.8626, Perplexity: 17.5070
Epoch [1/3], Step [371/1618], Loss: 2.7443, Perplexity: 15.5545
Epoch [1/3], Step [372/1618], Loss: 2.7206, Perplexity: 15.1892
Epoch [1/3], Step [373/1618], Loss: 3.0707, Perplexity: 21.5560
Epoch [1/3], Step [374/1618], Loss: 2.7181, Perplexity: 15.1522
Epoch [1/3], Step [375/1618], Loss: 2.7735, Perplexity: 16.0149
Epoch [1/3], Step [376/1618], Loss: 2.9011, Perplexity: 18.1940
Epoch [1/3], Step [377/1618], Loss: 2.7412, Perplexity: 15.5053
Epoch [1/3], Step [378/1618], Loss: 2.8437, Perplexity: 17.1786
Epoch [1/3], Step [379/1618], Loss: 2.8988, Perplexity: 18.1518
Epoch [1/3], Step [380/1618], Loss: 2.6874, Perplexity: 14.6936
Epoch [1/3], Step [381/1618], Loss: 2.8988, Perplexity: 18.1526
Epoch [1/3], Step [382/1618], Loss: 2.7498, Perplexity: 15.6401
Epoch [1/3], Step [383/1618], Loss: 2.7997, Perplexity: 16.4390
Epoch [1/3], Step [384/1618], Loss: 2.8529, Perplexity: 17.3375
Epoch [1/3], Step [385/1618], Loss: 2.8540, Perplexity: 17.3564
Epoch [1/3], Step [386/1618], Loss: 2.7766, Perplexity: 16.0637
Epoch [1/3], Step [387/1618], Loss: 2.7702, Perplexity: 15.9619
Epoch [1/3], Step [388/1618], Loss: 2.7248, Perplexity: 15.2537
Epoch [1/3], Step [389/1618], Loss: 2.8218, Perplexity: 16.8074
Epoch [1/3], Step [390/1618], Loss: 2.8446, Perplexity: 17.1945
Epoch [1/3], Step [391/1618], Loss: 3.1273, Perplexity: 22.8121
Epoch [1/3], Step [392/1618], Loss: 2.6834, Perplexity: 14.6353
Epoch [1/3], Step [393/1618], Loss: 2.5775, Perplexity: 13.1644
Epoch [1/3], Step [394/1618], Loss: 2.7193, Perplexity: 15.1693
Epoch [1/3], Step [395/1618], Loss: 2.9360, Perplexity: 18.8401
Epoch [1/3], Step [396/1618], Loss: 2.6518, Perplexity: 14.1789
Epoch [1/3], Step [397/1618], Loss: 2.9252, Perplexity: 18.6376
Epoch [1/3], Step [398/1618], Loss: 2.6095, Perplexity: 13.5926
Epoch [1/3], Step [399/1618], Loss: 2.6920, Perplexity: 14.7616
Epoch [1/3], Step [400/1618], Loss: 2.7176, Perplexity: 15.1440
Epoch [1/3], Step [401/1618], Loss: 2.8500, Perplexity: 17.2878
Epoch [1/3], Step [402/1618], Loss: 2.6482, Perplexity: 14.1284
Epoch [1/3], Step [403/1618], Loss: 2.7521, Perplexity: 15.6756
Epoch [1/3], Step [404/1618], Loss: 2.9308, Perplexity: 18.7432
Epoch [1/3], Step [405/1618], Loss: 2.8245, Perplexity: 16.8529
Epoch [1/3], Step [406/1618], Loss: 2.8721, Perplexity: 17.6733
Epoch [1/3], Step [407/1618], Loss: 2.9410, Perplexity: 18.9351
Epoch [1/3], Step [408/1618], Loss: 2.7228, Perplexity: 15.2226
Epoch [1/3], Step [409/1618], Loss: 3.1205, Perplexity: 22.6573
Epoch [1/3], Step [410/1618], Loss: 2.8230, Perplexity: 16.8275
Epoch [1/3], Step [411/1618], Loss: 2.7806, Perplexity: 16.1286
Epoch [1/3], Step [412/1618], Loss: 3.1601, Perplexity: 23.5728
Epoch [1/3], Step [413/1618], Loss: 2.6799, Perplexity: 14.5840
Epoch [1/3], Step [414/1618], Loss: 2.8167, Perplexity: 16.7211
Epoch [1/3], Step [415/1618], Loss: 2.8187, Perplexity: 16.7550
Epoch [1/3], Step [416/1618], Loss: 2.8418, Perplexity: 17.1458
Epoch [1/3], Step [417/1618], Loss: 2.9052, Perplexity: 18.2691
Epoch [1/3], Step [418/1618], Loss: 2.8159, Perplexity: 16.7089
Epoch [1/3], Step [419/1618], Loss: 2.7374, Perplexity: 15.4468
Epoch [1/3], Step [420/1618], Loss: 2.7439, Perplexity: 15.5472
Epoch [1/3], Step [421/1618], Loss: 2.8269, Perplexity: 16.8926
Epoch [1/3], Step [422/1618], Loss: 2.7688, Perplexity: 15.9400
Epoch [1/3], Step [423/1618], Loss: 2.6956, Perplexity: 14.8138
Epoch [1/3], Step [424/1618], Loss: 2.7594, Perplexity: 15.7906
Epoch [1/3], Step [425/1618], Loss: 2.7507, Perplexity: 15.6542
Epoch [1/3], Step [426/1618], Loss: 2.7069, Perplexity: 14.9830
Epoch [1/3], Step [427/1618], Loss: 2.6984, Perplexity: 14.8557
Epoch [1/3], Step [428/1618], Loss: 2.7807, Perplexity: 16.1311
Epoch [1/3], Step [429/1618], Loss: 2.6478, Perplexity: 14.1233
Epoch [1/3], Step [430/1618], Loss: 2.6835, Perplexity: 14.6365
Epoch [1/3], Step [431/1618], Loss: 2.5857, Perplexity: 13.2727
Epoch [1/3], Step [432/1618], Loss: 3.0875, Perplexity: 21.9227
Epoch [1/3], Step [433/1618], Loss: 2.6885, Perplexity: 14.7101
Epoch [1/3], Step [434/1618], Loss: 2.6242, Perplexity: 13.7938
Epoch [1/3], Step [435/1618], Loss: 2.6394, Perplexity: 14.0043
Epoch [1/3], Step [436/1618], Loss: 2.6183, Perplexity: 13.7125
Epoch [1/3], Step [437/1618], Loss: 2.4771, Perplexity: 11.9062
Epoch [1/3], Step [438/1618], Loss: 2.7179, Perplexity: 15.1487
Epoch [1/3], Step [439/1618], Loss: 2.6494, Perplexity: 14.1451
Epoch [1/3], Step [440/1618], Loss: 2.9712, Perplexity: 19.5157
Epoch [1/3], Step [441/1618], Loss: 2.6156, Perplexity: 13.6748
Epoch [1/3], Step [442/1618], Loss: 2.7938, Perplexity: 16.3426
Epoch [1/3], Step [443/1618], Loss: 5.6128, Perplexity: 273.9215
Epoch [1/3], Step [444/1618], Loss: 2.7351, Perplexity: 15.4117
Epoch [1/3], Step [445/1618], Loss: 3.0802, Perplexity: 21.7620
Epoch [1/3], Step [446/1618], Loss: 2.8205, Perplexity: 16.7845
Epoch [1/3], Step [447/1618], Loss: 2.6794, Perplexity: 14.5761
Epoch [1/3], Step [448/1618], Loss: 2.7288, Perplexity: 15.3142
Epoch [1/3], Step [449/1618], Loss: 2.7778, Perplexity: 16.0835
Epoch [1/3], Step [450/1618], Loss: 2.6102, Perplexity: 13.6019
Epoch [1/3], Step [451/1618], Loss: 2.7050, Perplexity: 14.9548
Epoch [1/3], Step [452/1618], Loss: 2.8432, Perplexity: 17.1702
Epoch [1/3], Step [453/1618], Loss: 2.7880, Perplexity: 16.2487
Epoch [1/3], Step [454/1618], Loss: 3.0051, Perplexity: 20.1879
Epoch [1/3], Step [455/1618], Loss: 2.7242, Perplexity: 15.2446
Epoch [1/3], Step [456/1618], Loss: 2.7907, Perplexity: 16.2920
Epoch [1/3], Step [457/1618], Loss: 2.7832, Perplexity: 16.1706
Epoch [1/3], Step [458/1618], Loss: 2.7678, Perplexity: 15.9237
Epoch [1/3], Step [459/1618], Loss: 2.6130, Perplexity: 13.6404
Epoch [1/3], Step [460/1618], Loss: 2.6404, Perplexity: 14.0191
Epoch [1/3], Step [461/1618], Loss: 2.7740, Perplexity: 16.0230
Epoch [1/3], Step [462/1618], Loss: 2.9006, Perplexity: 18.1854
Epoch [1/3], Step [463/1618], Loss: 2.7742, Perplexity: 16.0251
Epoch [1/3], Step [464/1618], Loss: 2.6062, Perplexity: 13.5471
Epoch [1/3], Step [465/1618], Loss: 2.8401, Perplexity: 17.1168
Epoch [1/3], Step [466/1618], Loss: 2.5807, Perplexity: 13.2058
Epoch [1/3], Step [467/1618], Loss: 2.6125, Perplexity: 13.6335
Epoch [1/3], Step [468/1618], Loss: 2.6853, Perplexity: 14.6628
Epoch [1/3], Step [469/1618], Loss: 2.6845, Perplexity: 14.6506
Epoch [1/3], Step [470/1618], Loss: 2.6548, Perplexity: 14.2215
Epoch [1/3], Step [471/1618], Loss: 2.6830, Perplexity: 14.6293
Epoch [1/3], Step [472/1618], Loss: 2.6474, Perplexity: 14.1172
Epoch [1/3], Step [473/1618], Loss: 2.7413, Perplexity: 15.5072
Epoch [1/3], Step [474/1618], Loss: 2.7202, Perplexity: 15.1840
Epoch [1/3], Step [475/1618], Loss: 3.0888, Perplexity: 21.9501
Epoch [1/3], Step [476/1618], Loss: 2.6708, Perplexity: 14.4510
Epoch [1/3], Step [477/1618], Loss: 2.6308, Perplexity: 13.8850
Epoch [1/3], Step [478/1618], Loss: 3.3635, Perplexity: 28.8904
Epoch [1/3], Step [479/1618], Loss: 2.6089, Perplexity: 13.5839
Epoch [1/3], Step [480/1618], Loss: 2.4950, Perplexity: 12.1214
Epoch [1/3], Step [481/1618], Loss: 2.8581, Perplexity: 17.4279
Epoch [1/3], Step [482/1618], Loss: 2.7549, Perplexity: 15.7190
Epoch [1/3], Step [483/1618], Loss: 2.7329, Perplexity: 15.3782
Epoch [1/3], Step [484/1618], Loss: 2.6947, Perplexity: 14.8017
Epoch [1/3], Step [485/1618], Loss: 2.8736, Perplexity: 17.7005
Epoch [1/3], Step [486/1618], Loss: 2.6272, Perplexity: 13.8346
Epoch [1/3], Step [487/1618], Loss: 2.7211, Perplexity: 15.1978
Epoch [1/3], Step [488/1618], Loss: 2.5790, Perplexity: 13.1836
Epoch [1/3], Step [489/1618], Loss: 2.7283, Perplexity: 15.3061
Epoch [1/3], Step [490/1618], Loss: 3.1733, Perplexity: 23.8860
Epoch [1/3], Step [491/1618], Loss: 2.8766, Perplexity: 17.7534
Epoch [1/3], Step [492/1618], Loss: 2.5995, Perplexity: 13.4576
Epoch [1/3], Step [493/1618], Loss: 3.3354, Perplexity: 28.0903
Epoch [1/3], Step [494/1618], Loss: 3.0781, Perplexity: 21.7162
Epoch [1/3], Step [495/1618], Loss: 2.6534, Perplexity: 14.2024
Epoch [1/3], Step [496/1618], Loss: 2.8548, Perplexity: 17.3707
Epoch [1/3], Step [497/1618], Loss: 2.6119, Perplexity: 13.6247
Epoch [1/3], Step [498/1618], Loss: 2.7536, Perplexity: 15.6990
Epoch [1/3], Step [499/1618], Loss: 2.6143, Perplexity: 13.6582
Epoch [1/3], Step [500/1618], Loss: 2.7299, Perplexity: 15.3312
Epoch [1/3], Step [501/1618], Loss: 2.6157, Perplexity: 13.6770
Epoch [1/3], Step [502/1618], Loss: 2.6472, Perplexity: 14.1145
Epoch [1/3], Step [503/1618], Loss: 2.6627, Perplexity: 14.3352
Epoch [1/3], Step [504/1618], Loss: 3.0483, Perplexity: 21.0791
Epoch [1/3], Step [505/1618], Loss: 3.0502, Perplexity: 21.1192
Epoch [1/3], Step [506/1618], Loss: 2.8146, Perplexity: 16.6867
Epoch [1/3], Step [507/1618], Loss: 2.6582, Perplexity: 14.2706
Epoch [1/3], Step [508/1618], Loss: 2.6937, Perplexity: 14.7865
Epoch [1/3], Step [509/1618], Loss: 2.5972, Perplexity: 13.4265
Epoch [1/3], Step [510/1618], Loss: 2.6850, Perplexity: 14.6585
Epoch [1/3], Step [511/1618], Loss: 2.7689, Perplexity: 15.9415
Epoch [1/3], Step [512/1618], Loss: 2.5613, Perplexity: 12.9527
Epoch [1/3], Step [513/1618], Loss: 2.7910, Perplexity: 16.2977
Epoch [1/3], Step [514/1618], Loss: 2.6660, Perplexity: 14.3816
Epoch [1/3], Step [515/1618], Loss: 2.6698, Perplexity: 14.4375
Epoch [1/3], Step [516/1618], Loss: 2.4898, Perplexity: 12.0590
Epoch [1/3], Step [517/1618], Loss: 2.7221, Perplexity: 15.2128
Epoch [1/3], Step [518/1618], Loss: 2.6382, Perplexity: 13.9873
Epoch [1/3], Step [519/1618], Loss: 2.6056, Perplexity: 13.5398
Epoch [1/3], Step [520/1618], Loss: 3.1049, Perplexity: 22.3072
Epoch [1/3], Step [521/1618], Loss: 2.6947, Perplexity: 14.8014
Epoch [1/3], Step [522/1618], Loss: 2.7515, Perplexity: 15.6662
Epoch [1/3], Step [523/1618], Loss: 2.5032, Perplexity: 12.2211
Epoch [1/3], Step [524/1618], Loss: 2.7442, Perplexity: 15.5527
Epoch [1/3], Step [525/1618], Loss: 2.6311, Perplexity: 13.8887
Epoch [1/3], Step [526/1618], Loss: 2.4929, Perplexity: 12.0963
Epoch [1/3], Step [527/1618], Loss: 2.6888, Perplexity: 14.7135
Epoch [1/3], Step [528/1618], Loss: 2.7174, Perplexity: 15.1409
Epoch [1/3], Step [529/1618], Loss: 2.6200, Perplexity: 13.7358
Epoch [1/3], Step [530/1618], Loss: 2.6414, Perplexity: 14.0326
Epoch [1/3], Step [531/1618], Loss: 2.6385, Perplexity: 13.9917
Epoch [1/3], Step [532/1618], Loss: 2.6164, Perplexity: 13.6859
Epoch [1/3], Step [533/1618], Loss: 2.5841, Perplexity: 13.2514
Epoch [1/3], Step [534/1618], Loss: 2.4817, Perplexity: 11.9613
Epoch [1/3], Step [535/1618], Loss: 2.5311, Perplexity: 12.5678
Epoch [1/3], Step [536/1618], Loss: 2.6626, Perplexity: 14.3341
Epoch [1/3], Step [537/1618], Loss: 2.4885, Perplexity: 12.0435
Epoch [1/3], Step [538/1618], Loss: 2.5135, Perplexity: 12.3482
Epoch [1/3], Step [539/1618], Loss: 2.5351, Perplexity: 12.6181
Epoch [1/3], Step [540/1618], Loss: 3.1952, Perplexity: 24.4158
Epoch [1/3], Step [541/1618], Loss: 3.0565, Perplexity: 21.2521
Epoch [1/3], Step [542/1618], Loss: 2.6610, Perplexity: 14.3110
Epoch [1/3], Step [543/1618], Loss: 2.6506, Perplexity: 14.1623
Epoch [1/3], Step [544/1618], Loss: 2.6239, Perplexity: 13.7895
Epoch [1/3], Step [545/1618], Loss: 2.6219, Perplexity: 13.7621
Epoch [1/3], Step [546/1618], Loss: 2.5844, Perplexity: 13.2550
Epoch [1/3], Step [547/1618], Loss: 2.6856, Perplexity: 14.6663
Epoch [1/3], Step [548/1618], Loss: 2.5444, Perplexity: 12.7355
Epoch [1/3], Step [549/1618], Loss: 2.5297, Perplexity: 12.5502
Epoch [1/3], Step [550/1618], Loss: 2.5469, Perplexity: 12.7676
Epoch [1/3], Step [551/1618], Loss: 2.5507, Perplexity: 12.8162
Epoch [1/3], Step [552/1618], Loss: 3.2173, Perplexity: 24.9609
Epoch [1/3], Step [553/1618], Loss: 2.6807, Perplexity: 14.5956
Epoch [1/3], Step [554/1618], Loss: 2.6718, Perplexity: 14.4661
Epoch [1/3], Step [555/1618], Loss: 2.7687, Perplexity: 15.9375
Epoch [1/3], Step [556/1618], Loss: 2.6613, Perplexity: 14.3146
Epoch [1/3], Step [557/1618], Loss: 2.5403, Perplexity: 12.6839
Epoch [1/3], Step [558/1618], Loss: 2.4520, Perplexity: 11.6117
Epoch [1/3], Step [559/1618], Loss: 2.4537, Perplexity: 11.6312
Epoch [1/3], Step [560/1618], Loss: 2.5746, Perplexity: 13.1263
Epoch [1/3], Step [561/1618], Loss: 2.5035, Perplexity: 12.2252
Epoch [1/3], Step [562/1618], Loss: 2.7879, Perplexity: 16.2474
Epoch [1/3], Step [563/1618], Loss: 2.8787, Perplexity: 17.7920
Epoch [1/3], Step [564/1618], Loss: 2.6195, Perplexity: 13.7285
Epoch [1/3], Step [565/1618], Loss: 2.6700, Perplexity: 14.4406
Epoch [1/3], Step [566/1618], Loss: 2.7500, Perplexity: 15.6429
Epoch [1/3], Step [567/1618], Loss: 2.6601, Perplexity: 14.2984
Epoch [1/3], Step [568/1618], Loss: 2.5645, Perplexity: 12.9947
Epoch [1/3], Step [569/1618], Loss: 2.6516, Perplexity: 14.1770
Epoch [1/3], Step [570/1618], Loss: 2.8697, Perplexity: 17.6317
Epoch [1/3], Step [571/1618], Loss: 2.5282, Perplexity: 12.5305
Epoch [1/3], Step [572/1618], Loss: 2.5069, Perplexity: 12.2664
Epoch [1/3], Step [573/1618], Loss: 3.3589, Perplexity: 28.7566
Epoch [1/3], Step [574/1618], Loss: 2.7910, Perplexity: 16.2974
Epoch [1/3], Step [575/1618], Loss: 2.6542, Perplexity: 14.2139
Epoch [1/3], Step [576/1618], Loss: 2.6060, Perplexity: 13.5448
Epoch [1/3], Step [577/1618], Loss: 2.5530, Perplexity: 12.8454
Epoch [1/3], Step [578/1618], Loss: 2.5660, Perplexity: 13.0137
Epoch [1/3], Step [579/1618], Loss: 2.6866, Perplexity: 14.6821
Epoch [1/3], Step [580/1618], Loss: 2.6054, Perplexity: 13.5373
Epoch [1/3], Step [581/1618], Loss: 2.5468, Perplexity: 12.7658
Epoch [1/3], Step [582/1618], Loss: 2.6884, Perplexity: 14.7077
Epoch [1/3], Step [583/1618], Loss: 2.6901, Perplexity: 14.7325
Epoch [1/3], Step [584/1618], Loss: 2.5084, Perplexity: 12.2857
Epoch [1/3], Step [585/1618], Loss: 2.5594, Perplexity: 12.9282
Epoch [1/3], Step [586/1618], Loss: 2.5665, Perplexity: 13.0204
Epoch [1/3], Step [587/1618], Loss: 2.4545, Perplexity: 11.6409
Epoch [1/3], Step [588/1618], Loss: 2.5424, Perplexity: 12.7098
Epoch [1/3], Step [589/1618], Loss: 2.7278, Perplexity: 15.2995
Epoch [1/3], Step [590/1618], Loss: 2.9217, Perplexity: 18.5733
Epoch [1/3], Step [591/1618], Loss: 2.5145, Perplexity: 12.3603
Epoch [1/3], Step [592/1618], Loss: 2.5014, Perplexity: 12.1997
Epoch [1/3], Step [593/1618], Loss: 2.4174, Perplexity: 11.2167
Epoch [1/3], Step [594/1618], Loss: 2.5015, Perplexity: 12.2007
Epoch [1/3], Step [595/1618], Loss: 2.5793, Perplexity: 13.1880
Epoch [1/3], Step [596/1618], Loss: 2.4875, Perplexity: 12.0314
Epoch [1/3], Step [597/1618], Loss: 2.5134, Perplexity: 12.3474
Epoch [1/3], Step [598/1618], Loss: 2.4723, Perplexity: 11.8492
Epoch [1/3], Step [599/1618], Loss: 2.5179, Perplexity: 12.4023
Epoch [1/3], Step [600/1618], Loss: 2.4966, Perplexity: 12.1407
Epoch [1/3], Step [601/1618], Loss: 2.9796, Perplexity: 19.6798
Epoch [1/3], Step [602/1618], Loss: 2.5423, Perplexity: 12.7087
Epoch [1/3], Step [603/1618], Loss: 2.5887, Perplexity: 13.3128
Epoch [1/3], Step [604/1618], Loss: 2.4708, Perplexity: 11.8313
Epoch [1/3], Step [605/1618], Loss: 2.4053, Perplexity: 11.0818
Epoch [1/3], Step [606/1618], Loss: 2.5001, Perplexity: 12.1834
Epoch [1/3], Step [607/1618], Loss: 2.5516, Perplexity: 12.8279
Epoch [1/3], Step [608/1618], Loss: 2.4359, Perplexity: 11.4267
Epoch [1/3], Step [609/1618], Loss: 2.5022, Perplexity: 12.2090
Epoch [1/3], Step [610/1618], Loss: 3.5882, Perplexity: 36.1672
Epoch [1/3], Step [611/1618], Loss: 2.4323, Perplexity: 11.3854
Epoch [1/3], Step [612/1618], Loss: 2.5174, Perplexity: 12.3966
Epoch [1/3], Step [613/1618], Loss: 2.5713, Perplexity: 13.0833
Epoch [1/3], Step [614/1618], Loss: 2.6239, Perplexity: 13.7890
Epoch [1/3], Step [615/1618], Loss: 2.4376, Perplexity: 11.4452
Epoch [1/3], Step [616/1618], Loss: 2.5237, Perplexity: 12.4742
Epoch [1/3], Step [617/1618], Loss: 2.6698, Perplexity: 14.4377
Epoch [1/3], Step [618/1618], Loss: 2.5304, Perplexity: 12.5588
Epoch [1/3], Step [619/1618], Loss: 2.8805, Perplexity: 17.8227
Epoch [1/3], Step [620/1618], Loss: 2.5830, Perplexity: 13.2370
Epoch [1/3], Step [621/1618], Loss: 2.5021, Perplexity: 12.2081
Epoch [1/3], Step [622/1618], Loss: 2.4366, Perplexity: 11.4337
Epoch [1/3], Step [623/1618], Loss: 2.5476, Perplexity: 12.7761
Epoch [1/3], Step [624/1618], Loss: 2.4488, Perplexity: 11.5745
Epoch [1/3], Step [625/1618], Loss: 2.3924, Perplexity: 10.9396
Epoch [1/3], Step [626/1618], Loss: 2.5134, Perplexity: 12.3465
Epoch [1/3], Step [627/1618], Loss: 2.7300, Perplexity: 15.3323
Epoch [1/3], Step [628/1618], Loss: 2.5124, Perplexity: 12.3342
Epoch [1/3], Step [629/1618], Loss: 2.5214, Perplexity: 12.4460
Epoch [1/3], Step [630/1618], Loss: 2.4617, Perplexity: 11.7242
Epoch [1/3], Step [631/1618], Loss: 2.4229, Perplexity: 11.2786
Epoch [1/3], Step [632/1618], Loss: 2.4860, Perplexity: 12.0134
Epoch [1/3], Step [633/1618], Loss: 2.7210, Perplexity: 15.1956
Epoch [1/3], Step [634/1618], Loss: 2.8470, Perplexity: 17.2368
Epoch [1/3], Step [635/1618], Loss: 2.5725, Perplexity: 13.0990
Epoch [1/3], Step [636/1618], Loss: 2.5769, Perplexity: 13.1569
Epoch [1/3], Step [637/1618], Loss: 2.8218, Perplexity: 16.8077
Epoch [1/3], Step [638/1618], Loss: 2.5185, Perplexity: 12.4105
Epoch [1/3], Step [639/1618], Loss: 2.9664, Perplexity: 19.4211
Epoch [1/3], Step [640/1618], Loss: 2.8349, Perplexity: 17.0284
Epoch [1/3], Step [641/1618], Loss: 2.5334, Perplexity: 12.5958
Epoch [1/3], Step [642/1618], Loss: 2.9077, Perplexity: 18.3146
Epoch [1/3], Step [643/1618], Loss: 2.5135, Perplexity: 12.3485
Epoch [1/3], Step [644/1618], Loss: 2.8973, Perplexity: 18.1252
Epoch [1/3], Step [645/1618], Loss: 2.7155, Perplexity: 15.1120
Epoch [1/3], Step [646/1618], Loss: 2.6439, Perplexity: 14.0683
Epoch [1/3], Step [647/1618], Loss: 2.4906, Perplexity: 12.0685
Epoch [1/3], Step [648/1618], Loss: 2.5081, Perplexity: 12.2813
Epoch [1/3], Step [649/1618], Loss: 3.1185, Perplexity: 22.6135
Epoch [1/3], Step [650/1618], Loss: 2.6671, Perplexity: 14.3979
Epoch [1/3], Step [651/1618], Loss: 2.5247, Perplexity: 12.4870
Epoch [1/3], Step [652/1618], Loss: 2.5235, Perplexity: 12.4724
Epoch [1/3], Step [653/1618], Loss: 2.5643, Perplexity: 12.9920
Epoch [1/3], Step [654/1618], Loss: 2.4073, Perplexity: 11.1041
Epoch [1/3], Step [655/1618], Loss: 2.4488, Perplexity: 11.5747
Epoch [1/3], Step [656/1618], Loss: 2.5267, Perplexity: 12.5117
Epoch [1/3], Step [657/1618], Loss: 2.4140, Perplexity: 11.1780
Epoch [1/3], Step [658/1618], Loss: 2.5006, Perplexity: 12.1895
Epoch [1/3], Step [659/1618], Loss: 2.4418, Perplexity: 11.4941
Epoch [1/3], Step [660/1618], Loss: 2.6440, Perplexity: 14.0698
Epoch [1/3], Step [661/1618], Loss: 2.4931, Perplexity: 12.0983
Epoch [1/3], Step [662/1618], Loss: 2.4970, Perplexity: 12.1462
Epoch [1/3], Step [663/1618], Loss: 2.4822, Perplexity: 11.9679
Epoch [1/3], Step [664/1618], Loss: 2.3966, Perplexity: 10.9858
Epoch [1/3], Step [665/1618], Loss: 2.4257, Perplexity: 11.3102
Epoch [1/3], Step [666/1618], Loss: 2.4565, Perplexity: 11.6637
Epoch [1/3], Step [667/1618], Loss: 3.2667, Perplexity: 26.2250
Epoch [1/3], Step [668/1618], Loss: 2.4245, Perplexity: 11.2963
Epoch [1/3], Step [669/1618], Loss: 2.5945, Perplexity: 13.3903
Epoch [1/3], Step [670/1618], Loss: 2.9003, Perplexity: 18.1800
Epoch [1/3], Step [671/1618], Loss: 2.4655, Perplexity: 11.7690
Epoch [1/3], Step [672/1618], Loss: 2.5041, Perplexity: 12.2320
Epoch [1/3], Step [673/1618], Loss: 2.4271, Perplexity: 11.3264
Epoch [1/3], Step [674/1618], Loss: 2.4460, Perplexity: 11.5422
Epoch [1/3], Step [675/1618], Loss: 2.5581, Perplexity: 12.9107
Epoch [1/3], Step [676/1618], Loss: 2.5009, Perplexity: 12.1940
Epoch [1/3], Step [677/1618], Loss: 2.5004, Perplexity: 12.1878
Epoch [1/3], Step [678/1618], Loss: 2.7256, Perplexity: 15.2661
Epoch [1/3], Step [679/1618], Loss: 2.5489, Perplexity: 12.7925
Epoch [1/3], Step [680/1618], Loss: 2.6001, Perplexity: 13.4650
Epoch [1/3], Step [681/1618], Loss: 2.5009, Perplexity: 12.1932
Epoch [1/3], Step [682/1618], Loss: 2.4058, Perplexity: 11.0878
Epoch [1/3], Step [683/1618], Loss: 2.5405, Perplexity: 12.6860
Epoch [1/3], Step [684/1618], Loss: 2.4399, Perplexity: 11.4713
Epoch [1/3], Step [685/1618], Loss: 2.4915, Perplexity: 12.0796
Epoch [1/3], Step [686/1618], Loss: 2.2450, Perplexity: 9.4406
Epoch [1/3], Step [687/1618], Loss: 2.6747, Perplexity: 14.5077
Epoch [1/3], Step [688/1618], Loss: 2.3935, Perplexity: 10.9517
Epoch [1/3], Step [689/1618], Loss: 2.5126, Perplexity: 12.3364
Epoch [1/3], Step [690/1618], Loss: 2.4989, Perplexity: 12.1686
Epoch [1/3], Step [691/1618], Loss: 2.4731, Perplexity: 11.8587
Epoch [1/3], Step [692/1618], Loss: 2.5559, Perplexity: 12.8830
Epoch [1/3], Step [693/1618], Loss: 3.0475, Perplexity: 21.0617
Epoch [1/3], Step [694/1618], Loss: 2.5244, Perplexity: 12.4834
Epoch [1/3], Step [695/1618], Loss: 2.6307, Perplexity: 13.8834
Epoch [1/3], Step [696/1618], Loss: 2.4034, Perplexity: 11.0603
Epoch [1/3], Step [697/1618], Loss: 2.5697, Perplexity: 13.0625
Epoch [1/3], Step [698/1618], Loss: 2.3734, Perplexity: 10.7337
Epoch [1/3], Step [699/1618], Loss: 2.5396, Perplexity: 12.6747
Epoch [1/3], Step [700/1618], Loss: 2.4923, Perplexity: 12.0888
Epoch [1/3], Step [701/1618], Loss: 2.4069, Perplexity: 11.0998
Epoch [1/3], Step [702/1618], Loss: 2.5696, Perplexity: 13.0600
Epoch [1/3], Step [703/1618], Loss: 2.6005, Perplexity: 13.4707
Epoch [1/3], Step [704/1618], Loss: 2.3629, Perplexity: 10.6216
Epoch [1/3], Step [705/1618], Loss: 2.8688, Perplexity: 17.6155
Epoch [1/3], Step [706/1618], Loss: 2.4404, Perplexity: 11.4779
Epoch [1/3], Step [707/1618], Loss: 2.3301, Perplexity: 10.2795
Epoch [1/3], Step [708/1618], Loss: 2.5777, Perplexity: 13.1671
Epoch [1/3], Step [709/1618], Loss: 2.3288, Perplexity: 10.2652
Epoch [1/3], Step [710/1618], Loss: 3.6195, Perplexity: 37.3202
Epoch [1/3], Step [711/1618], Loss: 2.7462, Perplexity: 15.5836
Epoch [1/3], Step [712/1618], Loss: 2.4411, Perplexity: 11.4855
Epoch [1/3], Step [713/1618], Loss: 2.4570, Perplexity: 11.6695
Epoch [1/3], Step [714/1618], Loss: 2.5931, Perplexity: 13.3707
Epoch [1/3], Step [715/1618], Loss: 2.3727, Perplexity: 10.7263
Epoch [1/3], Step [716/1618], Loss: 2.4816, Perplexity: 11.9608
Epoch [1/3], Step [717/1618], Loss: 2.6613, Perplexity: 14.3154
Epoch [1/3], Step [718/1618], Loss: 2.5501, Perplexity: 12.8089
Epoch [1/3], Step [719/1618], Loss: 2.5024, Perplexity: 12.2123
Epoch [1/3], Step [720/1618], Loss: 2.3902, Perplexity: 10.9151
Epoch [1/3], Step [721/1618], Loss: 2.5125, Perplexity: 12.3352
Epoch [1/3], Step [722/1618], Loss: 2.7900, Perplexity: 16.2814
Epoch [1/3], Step [723/1618], Loss: 2.6397, Perplexity: 14.0087
Epoch [1/3], Step [724/1618], Loss: 2.4757, Perplexity: 11.8897
Epoch [1/3], Step [725/1618], Loss: 2.5555, Perplexity: 12.8783
Epoch [1/3], Step [726/1618], Loss: 2.4778, Perplexity: 11.9146
Epoch [1/3], Step [727/1618], Loss: 2.5450, Perplexity: 12.7430
Epoch [1/3], Step [728/1618], Loss: 2.9915, Perplexity: 19.9148
Epoch [1/3], Step [729/1618], Loss: 2.3668, Perplexity: 10.6634
Epoch [1/3], Step [730/1618], Loss: 2.7938, Perplexity: 16.3425
Epoch [1/3], Step [731/1618], Loss: 2.5107, Perplexity: 12.3137
Epoch [1/3], Step [732/1618], Loss: 2.4026, Perplexity: 11.0522
Epoch [1/3], Step [733/1618], Loss: 2.5796, Perplexity: 13.1921
Epoch [1/3], Step [734/1618], Loss: 2.5879, Perplexity: 13.3018
Epoch [1/3], Step [735/1618], Loss: 2.6102, Perplexity: 13.6022
Epoch [1/3], Step [736/1618], Loss: 2.4268, Perplexity: 11.3227
Epoch [1/3], Step [737/1618], Loss: 2.5412, Perplexity: 12.6948
Epoch [1/3], Step [738/1618], Loss: 2.4657, Perplexity: 11.7714
Epoch [1/3], Step [739/1618], Loss: 2.4838, Perplexity: 11.9867
Epoch [1/3], Step [740/1618], Loss: 3.2278, Perplexity: 25.2253
Epoch [1/3], Step [741/1618], Loss: 2.4797, Perplexity: 11.9374
Epoch [1/3], Step [742/1618], Loss: 2.4498, Perplexity: 11.5864
Epoch [1/3], Step [743/1618], Loss: 2.4116, Perplexity: 11.1520
Epoch [1/3], Step [744/1618], Loss: 2.8637, Perplexity: 17.5266
Epoch [1/3], Step [745/1618], Loss: 2.3627, Perplexity: 10.6200
Epoch [1/3], Step [746/1618], Loss: 2.4410, Perplexity: 11.4850
Epoch [1/3], Step [747/1618], Loss: 2.7092, Perplexity: 15.0171
Epoch [1/3], Step [748/1618], Loss: 2.4126, Perplexity: 11.1631
Epoch [1/3], Step [749/1618], Loss: 2.4359, Perplexity: 11.4259
Epoch [1/3], Step [750/1618], Loss: 2.5931, Perplexity: 13.3708
Epoch [1/3], Step [751/1618], Loss: 2.5899, Perplexity: 13.3283
Epoch [1/3], Step [752/1618], Loss: 2.4474, Perplexity: 11.5587
Epoch [1/3], Step [753/1618], Loss: 2.4875, Perplexity: 12.0315
Epoch [1/3], Step [754/1618], Loss: 2.4494, Perplexity: 11.5813
Epoch [1/3], Step [755/1618], Loss: 2.8872, Perplexity: 17.9433
Epoch [1/3], Step [756/1618], Loss: 2.3413, Perplexity: 10.3950
Epoch [1/3], Step [757/1618], Loss: 2.5122, Perplexity: 12.3325
Epoch [1/3], Step [758/1618], Loss: 2.5057, Perplexity: 12.2524
Epoch [1/3], Step [759/1618], Loss: 2.7701, Perplexity: 15.9607
Epoch [1/3], Step [760/1618], Loss: 2.4121, Perplexity: 11.1569
Epoch [1/3], Step [761/1618], Loss: 2.3221, Perplexity: 10.1966
Epoch [1/3], Step [762/1618], Loss: 2.3858, Perplexity: 10.8673
Epoch [1/3], Step [763/1618], Loss: 2.4407, Perplexity: 11.4813
Epoch [1/3], Step [764/1618], Loss: 2.6396, Perplexity: 14.0072
Epoch [1/3], Step [765/1618], Loss: 2.4184, Perplexity: 11.2275
Epoch [1/3], Step [766/1618], Loss: 2.4090, Perplexity: 11.1232
Epoch [1/3], Step [767/1618], Loss: 2.4703, Perplexity: 11.8266
Epoch [1/3], Step [768/1618], Loss: 2.3358, Perplexity: 10.3379
Epoch [1/3], Step [769/1618], Loss: 2.6608, Perplexity: 14.3084
Epoch [1/3], Step [770/1618], Loss: 2.4713, Perplexity: 11.8379
Epoch [1/3], Step [771/1618], Loss: 2.4928, Perplexity: 12.0952
Epoch [1/3], Step [772/1618], Loss: 2.4093, Perplexity: 11.1264
Epoch [1/3], Step [773/1618], Loss: 2.4934, Perplexity: 12.1026
Epoch [1/3], Step [774/1618], Loss: 2.6513, Perplexity: 14.1724
Epoch [1/3], Step [775/1618], Loss: 2.8160, Perplexity: 16.7101
Epoch [1/3], Step [776/1618], Loss: 2.3918, Perplexity: 10.9328
Epoch [1/3], Step [777/1618], Loss: 2.5927, Perplexity: 13.3658
Epoch [1/3], Step [778/1618], Loss: 2.4762, Perplexity: 11.8956
Epoch [1/3], Step [779/1618], Loss: 2.4651, Perplexity: 11.7646
Epoch [1/3], Step [780/1618], Loss: 2.2695, Perplexity: 9.6743
Epoch [1/3], Step [781/1618], Loss: 2.4546, Perplexity: 11.6421
Epoch [1/3], Step [782/1618], Loss: 2.3885, Perplexity: 10.8973
Epoch [1/3], Step [783/1618], Loss: 2.4647, Perplexity: 11.7597
Epoch [1/3], Step [784/1618], Loss: 2.4374, Perplexity: 11.4438
Epoch [1/3], Step [785/1618], Loss: 2.4054, Perplexity: 11.0825
Epoch [1/3], Step [786/1618], Loss: 2.6601, Perplexity: 14.2981
Epoch [1/3], Step [787/1618], Loss: 2.4154, Perplexity: 11.1938
Epoch [1/3], Step [788/1618], Loss: 3.3712, Perplexity: 29.1148
Epoch [1/3], Step [789/1618], Loss: 2.6992, Perplexity: 14.8682
Epoch [1/3], Step [790/1618], Loss: 2.3297, Perplexity: 10.2745
Epoch [1/3], Step [791/1618], Loss: 2.3958, Perplexity: 10.9771
Epoch [1/3], Step [792/1618], Loss: 2.5137, Perplexity: 12.3508
Epoch [1/3], Step [793/1618], Loss: 2.4809, Perplexity: 11.9526
Epoch [1/3], Step [794/1618], Loss: 2.4328, Perplexity: 11.3906
Epoch [1/3], Step [795/1618], Loss: 2.4537, Perplexity: 11.6315
Epoch [1/3], Step [796/1618], Loss: 2.3326, Perplexity: 10.3052
Epoch [1/3], Step [797/1618], Loss: 3.0930, Perplexity: 22.0439
Epoch [1/3], Step [798/1618], Loss: 2.7975, Perplexity: 16.4036
Epoch [1/3], Step [799/1618], Loss: 2.5688, Perplexity: 13.0499
Epoch [1/3], Step [800/1618], Loss: 2.4365, Perplexity: 11.4327
Epoch [1/3], Step [801/1618], Loss: 2.3731, Perplexity: 10.7304
Epoch [1/3], Step [802/1618], Loss: 2.5280, Perplexity: 12.5281
Epoch [1/3], Step [803/1618], Loss: 2.4231, Perplexity: 11.2807
Epoch [1/3], Step [804/1618], Loss: 2.5855, Perplexity: 13.2706
Epoch [1/3], Step [805/1618], Loss: 2.7935, Perplexity: 16.3382
Epoch [1/3], Step [806/1618], Loss: 2.4444, Perplexity: 11.5237
Epoch [1/3], Step [807/1618], Loss: 2.5060, Perplexity: 12.2561
Epoch [1/3], Step [808/1618], Loss: 2.4627, Perplexity: 11.7361
Epoch [1/3], Step [809/1618], Loss: 2.4450, Perplexity: 11.5309
Epoch [1/3], Step [810/1618], Loss: 2.3221, Perplexity: 10.1974
Epoch [1/3], Step [811/1618], Loss: 2.6983, Perplexity: 14.8539
Epoch [1/3], Step [812/1618], Loss: 2.4670, Perplexity: 11.7867
Epoch [1/3], Step [813/1618], Loss: 2.4193, Perplexity: 11.2381
Epoch [1/3], Step [814/1618], Loss: 2.5652, Perplexity: 13.0038
Epoch [1/3], Step [815/1618], Loss: 2.4694, Perplexity: 11.8153
Epoch [1/3], Step [816/1618], Loss: 2.6646, Perplexity: 14.3624
Epoch [1/3], Step [817/1618], Loss: 2.4003, Perplexity: 11.0265
Epoch [1/3], Step [818/1618], Loss: 2.3968, Perplexity: 10.9885
Epoch [1/3], Step [819/1618], Loss: 2.4684, Perplexity: 11.8030
Epoch [1/3], Step [820/1618], Loss: 2.6271, Perplexity: 13.8335
Epoch [1/3], Step [821/1618], Loss: 2.5162, Perplexity: 12.3811
Epoch [1/3], Step [822/1618], Loss: 2.4639, Perplexity: 11.7511
Epoch [1/3], Step [823/1618], Loss: 2.2969, Perplexity: 9.9429
Epoch [1/3], Step [824/1618], Loss: 2.4200, Perplexity: 11.2460
Epoch [1/3], Step [825/1618], Loss: 2.4321, Perplexity: 11.3825
Epoch [1/3], Step [826/1618], Loss: 2.7682, Perplexity: 15.9292
Epoch [1/3], Step [827/1618], Loss: 2.4009, Perplexity: 11.0335
Epoch [1/3], Step [828/1618], Loss: 2.7074, Perplexity: 14.9901
Epoch [1/3], Step [829/1618], Loss: 2.4508, Perplexity: 11.5982
Epoch [1/3], Step [830/1618], Loss: 2.4274, Perplexity: 11.3296
Epoch [1/3], Step [831/1618], Loss: 2.3289, Perplexity: 10.2668
Epoch [1/3], Step [832/1618], Loss: 2.7141, Perplexity: 15.0907
Epoch [1/3], Step [833/1618], Loss: 2.3793, Perplexity: 10.7972
Epoch [1/3], Step [834/1618], Loss: 2.3331, Perplexity: 10.3095
Epoch [1/3], Step [835/1618], Loss: 2.5665, Perplexity: 13.0196
Epoch [1/3], Step [836/1618], Loss: 2.5620, Perplexity: 12.9624
Epoch [1/3], Step [837/1618], Loss: 2.6517, Perplexity: 14.1775
Epoch [1/3], Step [838/1618], Loss: 2.6364, Perplexity: 13.9630
Epoch [1/3], Step [839/1618], Loss: 2.4410, Perplexity: 11.4850
Epoch [1/3], Step [840/1618], Loss: 2.3752, Perplexity: 10.7530
Epoch [1/3], Step [841/1618], Loss: 2.7181, Perplexity: 15.1518
Epoch [1/3], Step [842/1618], Loss: 2.4523, Perplexity: 11.6151
Epoch [1/3], Step [843/1618], Loss: 2.4326, Perplexity: 11.3883
Epoch [1/3], Step [844/1618], Loss: 2.6401, Perplexity: 14.0147
Epoch [1/3], Step [845/1618], Loss: 2.3812, Perplexity: 10.8174
Epoch [1/3], Step [846/1618], Loss: 2.8261, Perplexity: 16.8788
Epoch [1/3], Step [847/1618], Loss: 2.4589, Perplexity: 11.6914
Epoch [1/3], Step [848/1618], Loss: 2.5290, Perplexity: 12.5405
Epoch [1/3], Step [849/1618], Loss: 2.4253, Perplexity: 11.3060
Epoch [1/3], Step [850/1618], Loss: 2.4108, Perplexity: 11.1428
Epoch [1/3], Step [851/1618], Loss: 2.3952, Perplexity: 10.9702
Epoch [1/3], Step [852/1618], Loss: 2.3749, Perplexity: 10.7501
Epoch [1/3], Step [853/1618], Loss: 2.7084, Perplexity: 15.0048
Epoch [1/3], Step [854/1618], Loss: 2.5406, Perplexity: 12.6870
Epoch [1/3], Step [855/1618], Loss: 2.5150, Perplexity: 12.3667
Epoch [1/3], Step [856/1618], Loss: 2.4872, Perplexity: 12.0273
Epoch [1/3], Step [857/1618], Loss: 2.6019, Perplexity: 13.4892
Epoch [1/3], Step [858/1618], Loss: 3.2055, Perplexity: 24.6688
Epoch [1/3], Step [859/1618], Loss: 2.5094, Perplexity: 12.2975
Epoch [1/3], Step [860/1618], Loss: 2.3939, Perplexity: 10.9557
Epoch [1/3], Step [861/1618], Loss: 2.3923, Perplexity: 10.9382
Epoch [1/3], Step [862/1618], Loss: 2.3515, Perplexity: 10.5012
Epoch [1/3], Step [863/1618], Loss: 2.3910, Perplexity: 10.9247
Epoch [1/3], Step [864/1618], Loss: 2.3028, Perplexity: 10.0021
Epoch [1/3], Step [865/1618], Loss: 2.3138, Perplexity: 10.1128
Epoch [1/3], Step [866/1618], Loss: 2.3474, Perplexity: 10.4579
Epoch [1/3], Step [867/1618], Loss: 2.5695, Perplexity: 13.0589
Epoch [1/3], Step [868/1618], Loss: 2.3121, Perplexity: 10.0952
Epoch [1/3], Step [869/1618], Loss: 2.3635, Perplexity: 10.6282
Epoch [1/3], Step [870/1618], Loss: 2.4270, Perplexity: 11.3251
Epoch [1/3], Step [871/1618], Loss: 2.3714, Perplexity: 10.7122
Epoch [1/3], Step [872/1618], Loss: 2.4025, Perplexity: 11.0507
Epoch [1/3], Step [873/1618], Loss: 2.4220, Perplexity: 11.2685
Epoch [1/3], Step [874/1618], Loss: 2.3205, Perplexity: 10.1812
Epoch [1/3], Step [875/1618], Loss: 2.4188, Perplexity: 11.2320
Epoch [1/3], Step [876/1618], Loss: 2.3772, Perplexity: 10.7747
Epoch [1/3], Step [877/1618], Loss: 2.3440, Perplexity: 10.4224
Epoch [1/3], Step [878/1618], Loss: 2.7776, Perplexity: 16.0811
Epoch [1/3], Step [879/1618], Loss: 2.3625, Perplexity: 10.6177
Epoch [1/3], Step [880/1618], Loss: 2.4184, Perplexity: 11.2283
Epoch [1/3], Step [881/1618], Loss: 2.6404, Perplexity: 14.0191
Epoch [1/3], Step [882/1618], Loss: 2.9665, Perplexity: 19.4240
Epoch [1/3], Step [883/1618], Loss: 2.3896, Perplexity: 10.9096
Epoch [1/3], Step [884/1618], Loss: 2.4241, Perplexity: 11.2921
Epoch [1/3], Step [885/1618], Loss: 2.3821, Perplexity: 10.8280
Epoch [1/3], Step [886/1618], Loss: 2.3004, Perplexity: 9.9781
Epoch [1/3], Step [887/1618], Loss: 2.3939, Perplexity: 10.9564
Epoch [1/3], Step [888/1618], Loss: 2.4435, Perplexity: 11.5129
Epoch [1/3], Step [889/1618], Loss: 2.3879, Perplexity: 10.8908
Epoch [1/3], Step [890/1618], Loss: 2.4127, Perplexity: 11.1641
Epoch [1/3], Step [891/1618], Loss: 2.4233, Perplexity: 11.2830
Epoch [1/3], Step [892/1618], Loss: 2.4102, Perplexity: 11.1362
Epoch [1/3], Step [893/1618], Loss: 2.4950, Perplexity: 12.1218
Epoch [1/3], Step [894/1618], Loss: 2.3695, Perplexity: 10.6924
Epoch [1/3], Step [895/1618], Loss: 2.3578, Perplexity: 10.5679
Epoch [1/3], Step [896/1618], Loss: 2.3630, Perplexity: 10.6226
Epoch [1/3], Step [897/1618], Loss: 2.3067, Perplexity: 10.0408
Epoch [1/3], Step [898/1618], Loss: 2.4079, Perplexity: 11.1111
Epoch [1/3], Step [899/1618], Loss: 2.2659, Perplexity: 9.6397
Epoch [1/3], Step [900/1618], Loss: 2.3963, Perplexity: 10.9827
Epoch [1/3], Step [901/1618], Loss: 2.3240, Perplexity: 10.2160
Epoch [1/3], Step [902/1618], Loss: 2.3624, Perplexity: 10.6162
Epoch [1/3], Step [903/1618], Loss: 2.4519, Perplexity: 11.6106
Epoch [1/3], Step [904/1618], Loss: 2.2448, Perplexity: 9.4388
Epoch [1/3], Step [905/1618], Loss: 2.3692, Perplexity: 10.6887
Epoch [1/3], Step [906/1618], Loss: 2.2823, Perplexity: 9.7991
Epoch [1/3], Step [907/1618], Loss: 2.5137, Perplexity: 12.3503
Epoch [1/3], Step [908/1618], Loss: 2.5077, Perplexity: 12.2762
Epoch [1/3], Step [909/1618], Loss: 2.3160, Perplexity: 10.1351
Epoch [1/3], Step [910/1618], Loss: 2.6140, Perplexity: 13.6529
Epoch [1/3], Step [911/1618], Loss: 2.3441, Perplexity: 10.4237
Epoch [1/3], Step [912/1618], Loss: 2.3125, Perplexity: 10.0991
Epoch [1/3], Step [913/1618], Loss: 2.3403, Perplexity: 10.3847
Epoch [1/3], Step [914/1618], Loss: 2.3751, Perplexity: 10.7518
Epoch [1/3], Step [915/1618], Loss: 2.3248, Perplexity: 10.2245
Epoch [1/3], Step [916/1618], Loss: 2.6877, Perplexity: 14.6977
Epoch [1/3], Step [917/1618], Loss: 2.2781, Perplexity: 9.7578
Epoch [1/3], Step [918/1618], Loss: 2.4749, Perplexity: 11.8807
Epoch [1/3], Step [919/1618], Loss: 3.4087, Perplexity: 30.2273
Epoch [1/3], Step [920/1618], Loss: 2.2994, Perplexity: 9.9686
Epoch [1/3], Step [921/1618], Loss: 2.4466, Perplexity: 11.5496
Epoch [1/3], Step [922/1618], Loss: 2.2801, Perplexity: 9.7773
Epoch [1/3], Step [923/1618], Loss: 2.3645, Perplexity: 10.6388
Epoch [1/3], Step [924/1618], Loss: 2.4079, Perplexity: 11.1110
Epoch [1/3], Step [925/1618], Loss: 2.5159, Perplexity: 12.3777
Epoch [1/3], Step [926/1618], Loss: 2.4954, Perplexity: 12.1262
Epoch [1/3], Step [927/1618], Loss: 2.2426, Perplexity: 9.4181
Epoch [1/3], Step [928/1618], Loss: 2.6379, Perplexity: 13.9832
Epoch [1/3], Step [929/1618], Loss: 2.3357, Perplexity: 10.3368
Epoch [1/3], Step [930/1618], Loss: 2.6216, Perplexity: 13.7572
Epoch [1/3], Step [931/1618], Loss: 2.4696, Perplexity: 11.8173
Epoch [1/3], Step [932/1618], Loss: 2.4613, Perplexity: 11.7197
Epoch [1/3], Step [933/1618], Loss: 2.2858, Perplexity: 9.8335
Epoch [1/3], Step [934/1618], Loss: 2.4276, Perplexity: 11.3313
Epoch [1/3], Step [935/1618], Loss: 2.3660, Perplexity: 10.6546
Epoch [1/3], Step [936/1618], Loss: 2.4489, Perplexity: 11.5759
Epoch [1/3], Step [937/1618], Loss: 2.3393, Perplexity: 10.3738
Epoch [1/3], Step [938/1618], Loss: 2.2900, Perplexity: 9.8750
Epoch [1/3], Step [939/1618], Loss: 2.7382, Perplexity: 15.4585
Epoch [1/3], Step [940/1618], Loss: 2.3345, Perplexity: 10.3241
Epoch [1/3], Step [941/1618], Loss: 2.3698, Perplexity: 10.6953
Epoch [1/3], Step [942/1618], Loss: 2.3302, Perplexity: 10.2803
Epoch [1/3], Step [943/1618], Loss: 2.3486, Perplexity: 10.4708
Epoch [1/3], Step [944/1618], Loss: 2.8335, Perplexity: 17.0057
Epoch [1/3], Step [945/1618], Loss: 2.6490, Perplexity: 14.1397
Epoch [1/3], Step [946/1618], Loss: 2.2782, Perplexity: 9.7590
Epoch [1/3], Step [947/1618], Loss: 2.2699, Perplexity: 9.6781
Epoch [1/3], Step [948/1618], Loss: 2.7362, Perplexity: 15.4288
Epoch [1/3], Step [949/1618], Loss: 2.2849, Perplexity: 9.8251
Epoch [1/3], Step [950/1618], Loss: 2.2863, Perplexity: 9.8381
Epoch [1/3], Step [951/1618], Loss: 2.3978, Perplexity: 10.9995
Epoch [1/3], Step [952/1618], Loss: 2.3109, Perplexity: 10.0835
Epoch [1/3], Step [953/1618], Loss: 2.4699, Perplexity: 11.8212
Epoch [1/3], Step [954/1618], Loss: 2.5278, Perplexity: 12.5259
Epoch [1/3], Step [955/1618], Loss: 2.3630, Perplexity: 10.6226
Epoch [1/3], Step [956/1618], Loss: 2.5536, Perplexity: 12.8531
Epoch [1/3], Step [957/1618], Loss: 2.2593, Perplexity: 9.5764
Epoch [1/3], Step [958/1618], Loss: 2.3283, Perplexity: 10.2603
Epoch [1/3], Step [959/1618], Loss: 2.4222, Perplexity: 11.2705
Epoch [1/3], Step [960/1618], Loss: 2.4879, Perplexity: 12.0357
Epoch [1/3], Step [961/1618], Loss: 3.0798, Perplexity: 21.7542
Epoch [1/3], Step [962/1618], Loss: 2.5124, Perplexity: 12.3342
Epoch [1/3], Step [963/1618], Loss: 2.3133, Perplexity: 10.1073
Epoch [1/3], Step [964/1618], Loss: 2.4051, Perplexity: 11.0797
Epoch [1/3], Step [965/1618], Loss: 2.7203, Perplexity: 15.1843
Epoch [1/3], Step [966/1618], Loss: 2.3344, Perplexity: 10.3228
Epoch [1/3], Step [967/1618], Loss: 2.5411, Perplexity: 12.6935
Epoch [1/3], Step [968/1618], Loss: 2.3996, Perplexity: 11.0191
Epoch [1/3], Step [969/1618], Loss: 2.3772, Perplexity: 10.7745
Epoch [1/3], Step [970/1618], Loss: 2.3153, Perplexity: 10.1275
Epoch [1/3], Step [971/1618], Loss: 2.4261, Perplexity: 11.3143
Epoch [1/3], Step [972/1618], Loss: 2.3621, Perplexity: 10.6133
Epoch [1/3], Step [973/1618], Loss: 2.3262, Perplexity: 10.2386
Epoch [1/3], Step [974/1618], Loss: 2.3377, Perplexity: 10.3577
Epoch [1/3], Step [975/1618], Loss: 2.3573, Perplexity: 10.5619
Epoch [1/3], Step [976/1618], Loss: 2.2541, Perplexity: 9.5266
Epoch [1/3], Step [977/1618], Loss: 2.3330, Perplexity: 10.3093
Epoch [1/3], Step [978/1618], Loss: 2.7643, Perplexity: 15.8675
Epoch [1/3], Step [979/1618], Loss: 2.2214, Perplexity: 9.2201
Epoch [1/3], Step [980/1618], Loss: 2.3261, Perplexity: 10.2382
Epoch [1/3], Step [981/1618], Loss: 2.3544, Perplexity: 10.5320
Epoch [1/3], Step [982/1618], Loss: 2.3567, Perplexity: 10.5562
Epoch [1/3], Step [983/1618], Loss: 2.4525, Perplexity: 11.6176
Epoch [1/3], Step [984/1618], Loss: 2.3816, Perplexity: 10.8227
Epoch [1/3], Step [985/1618], Loss: 2.3169, Perplexity: 10.1443
Epoch [1/3], Step [986/1618], Loss: 2.3595, Perplexity: 10.5859
Epoch [1/3], Step [987/1618], Loss: 2.3677, Perplexity: 10.6724
Epoch [1/3], Step [988/1618], Loss: 2.4883, Perplexity: 12.0414
Epoch [1/3], Step [989/1618], Loss: 2.2979, Perplexity: 9.9534
Epoch [1/3], Step [990/1618], Loss: 2.3571, Perplexity: 10.5608
Epoch [1/3], Step [991/1618], Loss: 2.7672, Perplexity: 15.9138
Epoch [1/3], Step [992/1618], Loss: 2.5452, Perplexity: 12.7459
Epoch [1/3], Step [993/1618], Loss: 2.4890, Perplexity: 12.0487
Epoch [1/3], Step [994/1618], Loss: 2.3056, Perplexity: 10.0303
Epoch [1/3], Step [995/1618], Loss: 2.2787, Perplexity: 9.7641
Epoch [1/3], Step [996/1618], Loss: 2.4102, Perplexity: 11.1357
Epoch [1/3], Step [997/1618], Loss: 2.4469, Perplexity: 11.5523
Epoch [1/3], Step [998/1618], Loss: 2.2921, Perplexity: 9.8958
Epoch [1/3], Step [999/1618], Loss: 2.3917, Perplexity: 10.9320
Epoch [1/3], Step [1000/1618], Loss: 2.3303, Perplexity: 10.2811
Epoch [1/3], Step [1001/1618], Loss: 2.3798, Perplexity: 10.8024
Epoch [1/3], Step [1002/1618], Loss: 2.4300, Perplexity: 11.3593
Epoch [1/3], Step [1003/1618], Loss: 2.3652, Perplexity: 10.6460
Epoch [1/3], Step [1004/1618], Loss: 2.2305, Perplexity: 9.3045
Epoch [1/3], Step [1005/1618], Loss: 2.5395, Perplexity: 12.6733
Epoch [1/3], Step [1006/1618], Loss: 2.4964, Perplexity: 12.1383
Epoch [1/3], Step [1007/1618], Loss: 2.3186, Perplexity: 10.1609
Epoch [1/3], Step [1008/1618], Loss: 2.5992, Perplexity: 13.4528
Epoch [1/3], Step [1009/1618], Loss: 2.4237, Perplexity: 11.2874
Epoch [1/3], Step [1010/1618], Loss: 2.4988, Perplexity: 12.1674
Epoch [1/3], Step [1011/1618], Loss: 2.3488, Perplexity: 10.4730
Epoch [1/3], Step [1012/1618], Loss: 2.4012, Perplexity: 11.0367
Epoch [1/3], Step [1013/1618], Loss: 2.3145, Perplexity: 10.1202
Epoch [1/3], Step [1014/1618], Loss: 2.6112, Perplexity: 13.6159
Epoch [1/3], Step [1015/1618], Loss: 2.2720, Perplexity: 9.6985
Epoch [1/3], Step [1016/1618], Loss: 2.2637, Perplexity: 9.6187
Epoch [1/3], Step [1017/1618], Loss: 2.3724, Perplexity: 10.7236
Epoch [1/3], Step [1018/1618], Loss: 2.3222, Perplexity: 10.1979
Epoch [1/3], Step [1019/1618], Loss: 2.2642, Perplexity: 9.6232
Epoch [1/3], Step [1020/1618], Loss: 2.9791, Perplexity: 19.6709
Epoch [1/3], Step [1021/1618], Loss: 3.2380, Perplexity: 25.4817
Epoch [1/3], Step [1022/1618], Loss: 2.2519, Perplexity: 9.5058
Epoch [1/3], Step [1023/1618], Loss: 2.3230, Perplexity: 10.2066
Epoch [1/3], Step [1024/1618], Loss: 2.3182, Perplexity: 10.1579
Epoch [1/3], Step [1025/1618], Loss: 2.4482, Perplexity: 11.5670
Epoch [1/3], Step [1026/1618], Loss: 2.3962, Perplexity: 10.9808
Epoch [1/3], Step [1027/1618], Loss: 2.3714, Perplexity: 10.7119
Epoch [1/3], Step [1028/1618], Loss: 2.6324, Perplexity: 13.9076
Epoch [1/3], Step [1029/1618], Loss: 2.1975, Perplexity: 9.0025
Epoch [1/3], Step [1030/1618], Loss: 2.2995, Perplexity: 9.9690
Epoch [1/3], Step [1031/1618], Loss: 3.0310, Perplexity: 20.7180
Epoch [1/3], Step [1032/1618], Loss: 2.5319, Perplexity: 12.5780
Epoch [1/3], Step [1033/1618], Loss: 2.3009, Perplexity: 9.9831
Epoch [1/3], Step [1034/1618], Loss: 2.3179, Perplexity: 10.1547
Epoch [1/3], Step [1035/1618], Loss: 2.3627, Perplexity: 10.6196
Epoch [1/3], Step [1036/1618], Loss: 2.4108, Perplexity: 11.1424
Epoch [1/3], Step [1037/1618], Loss: 2.2187, Perplexity: 9.1958
Epoch [1/3], Step [1038/1618], Loss: 2.1926, Perplexity: 8.9582
Epoch [1/3], Step [1039/1618], Loss: 2.5336, Perplexity: 12.5984
Epoch [1/3], Step [1040/1618], Loss: 2.5110, Perplexity: 12.3177
Epoch [1/3], Step [1041/1618], Loss: 2.3622, Perplexity: 10.6145
Epoch [1/3], Step [1042/1618], Loss: 2.5507, Perplexity: 12.8156
Epoch [1/3], Step [1043/1618], Loss: 2.3230, Perplexity: 10.2058
Epoch [1/3], Step [1044/1618], Loss: 2.2454, Perplexity: 9.4442
Epoch [1/3], Step [1045/1618], Loss: 2.3408, Perplexity: 10.3895
Epoch [1/3], Step [1046/1618], Loss: 2.5878, Perplexity: 13.3007
Epoch [1/3], Step [1047/1618], Loss: 2.5651, Perplexity: 13.0015
Epoch [1/3], Step [1048/1618], Loss: 2.9157, Perplexity: 18.4621
Epoch [1/3], Step [1049/1618], Loss: 2.3137, Perplexity: 10.1123
Epoch [1/3], Step [1050/1618], Loss: 2.4221, Perplexity: 11.2690
Epoch [1/3], Step [1051/1618], Loss: 2.3549, Perplexity: 10.5372
Epoch [1/3], Step [1052/1618], Loss: 2.2391, Perplexity: 9.3849
Epoch [1/3], Step [1053/1618], Loss: 2.4807, Perplexity: 11.9498
Epoch [1/3], Step [1054/1618], Loss: 2.5646, Perplexity: 12.9952
Epoch [1/3], Step [1055/1618], Loss: 2.1899, Perplexity: 8.9343
Epoch [1/3], Step [1056/1618], Loss: 2.2868, Perplexity: 9.8435
Epoch [1/3], Step [1057/1618], Loss: 2.4588, Perplexity: 11.6913
Epoch [1/3], Step [1058/1618], Loss: 2.6121, Perplexity: 13.6278
Epoch [1/3], Step [1059/1618], Loss: 2.1049, Perplexity: 8.2063
Epoch [1/3], Step [1060/1618], Loss: 2.4391, Perplexity: 11.4629
Epoch [1/3], Step [1061/1618], Loss: 2.3482, Perplexity: 10.4671
Epoch [1/3], Step [1062/1618], Loss: 2.3764, Perplexity: 10.7665
Epoch [1/3], Step [1063/1618], Loss: 2.2934, Perplexity: 9.9084
Epoch [1/3], Step [1064/1618], Loss: 2.2298, Perplexity: 9.2983
Epoch [1/3], Step [1065/1618], Loss: 2.2422, Perplexity: 9.4141
Epoch [1/3], Step [1066/1618], Loss: 2.4770, Perplexity: 11.9057
Epoch [1/3], Step [1067/1618], Loss: 2.3377, Perplexity: 10.3576
Epoch [1/3], Step [1068/1618], Loss: 2.2939, Perplexity: 9.9134
Epoch [1/3], Step [1069/1618], Loss: 2.4327, Perplexity: 11.3900
Epoch [1/3], Step [1070/1618], Loss: 2.4203, Perplexity: 11.2489
Epoch [1/3], Step [1071/1618], Loss: 2.2824, Perplexity: 9.7999
Epoch [1/3], Step [1072/1618], Loss: 2.2974, Perplexity: 9.9481
Epoch [1/3], Step [1073/1618], Loss: 2.2020, Perplexity: 9.0434
Epoch [1/3], Step [1074/1618], Loss: 2.7259, Perplexity: 15.2697
Epoch [1/3], Step [1075/1618], Loss: 2.3008, Perplexity: 9.9824
Epoch [1/3], Step [1076/1618], Loss: 2.2592, Perplexity: 9.5759
Epoch [1/3], Step [1077/1618], Loss: 2.2503, Perplexity: 9.4909
Epoch [1/3], Step [1078/1618], Loss: 2.3373, Perplexity: 10.3534
Epoch [1/3], Step [1079/1618], Loss: 2.3803, Perplexity: 10.8076
Epoch [1/3], Step [1080/1618], Loss: 2.2632, Perplexity: 9.6138
Epoch [1/3], Step [1081/1618], Loss: 2.4010, Perplexity: 11.0346
Epoch [1/3], Step [1082/1618], Loss: 2.2387, Perplexity: 9.3809
Epoch [1/3], Step [1083/1618], Loss: 2.3425, Perplexity: 10.4071
Epoch [1/3], Step [1084/1618], Loss: 2.3536, Perplexity: 10.5237
Epoch [1/3], Step [1085/1618], Loss: 2.3337, Perplexity: 10.3162
Epoch [1/3], Step [1086/1618], Loss: 2.3405, Perplexity: 10.3866
Epoch [1/3], Step [1087/1618], Loss: 2.3890, Perplexity: 10.9025
Epoch [1/3], Step [1088/1618], Loss: 2.3766, Perplexity: 10.7681
Epoch [1/3], Step [1089/1618], Loss: 2.2991, Perplexity: 9.9648
Epoch [1/3], Step [1090/1618], Loss: 2.2316, Perplexity: 9.3151
Epoch [1/3], Step [1091/1618], Loss: 2.2265, Perplexity: 9.2671
Epoch [1/3], Step [1092/1618], Loss: 2.8924, Perplexity: 18.0357
Epoch [1/3], Step [1093/1618], Loss: 2.3107, Perplexity: 10.0811
Epoch [1/3], Step [1094/1618], Loss: 2.3119, Perplexity: 10.0935
Epoch [1/3], Step [1095/1618], Loss: 2.2354, Perplexity: 9.3498
Epoch [1/3], Step [1096/1618], Loss: 2.2796, Perplexity: 9.7727
Epoch [1/3], Step [1097/1618], Loss: 2.4349, Perplexity: 11.4147
Epoch [1/3], Step [1098/1618], Loss: 2.2658, Perplexity: 9.6384
Epoch [1/3], Step [1099/1618], Loss: 2.2311, Perplexity: 9.3104
Epoch [1/3], Step [1100/1618], Loss: 2.2818, Perplexity: 9.7945
Epoch [1/3], Step [1101/1618], Loss: 2.2434, Perplexity: 9.4255
Epoch [1/3], Step [1102/1618], Loss: 2.3493, Perplexity: 10.4777
Epoch [1/3], Step [1103/1618], Loss: 2.3035, Perplexity: 10.0090
Epoch [1/3], Step [1104/1618], Loss: 2.2171, Perplexity: 9.1805
Epoch [1/3], Step [1105/1618], Loss: 2.2035, Perplexity: 9.0566
Epoch [1/3], Step [1106/1618], Loss: 2.1789, Perplexity: 8.8362
Epoch [1/3], Step [1107/1618], Loss: 2.2696, Perplexity: 9.6759
Epoch [1/3], Step [1108/1618], Loss: 2.1760, Perplexity: 8.8107
Epoch [1/3], Step [1109/1618], Loss: 2.7690, Perplexity: 15.9423
Epoch [1/3], Step [1110/1618], Loss: 2.4066, Perplexity: 11.0966
Epoch [1/3], Step [1111/1618], Loss: 2.5219, Perplexity: 12.4522
Epoch [1/3], Step [1112/1618], Loss: 2.7178, Perplexity: 15.1465
Epoch [1/3], Step [1113/1618], Loss: 2.3616, Perplexity: 10.6076
Epoch [1/3], Step [1114/1618], Loss: 2.3154, Perplexity: 10.1292
Epoch [1/3], Step [1115/1618], Loss: 2.3920, Perplexity: 10.9353
Epoch [1/3], Step [1116/1618], Loss: 2.3169, Perplexity: 10.1445
Epoch [1/3], Step [1117/1618], Loss: 2.2705, Perplexity: 9.6842
Epoch [1/3], Step [1118/1618], Loss: 2.4162, Perplexity: 11.2031
Epoch [1/3], Step [1119/1618], Loss: 2.3556, Perplexity: 10.5444
Epoch [1/3], Step [1120/1618], Loss: 2.3035, Perplexity: 10.0095
Epoch [1/3], Step [1121/1618], Loss: 2.1810, Perplexity: 8.8554
Epoch [1/3], Step [1122/1618], Loss: 2.3865, Perplexity: 10.8756
Epoch [1/3], Step [1123/1618], Loss: 2.2513, Perplexity: 9.5001
Epoch [1/3], Step [1124/1618], Loss: 2.2657, Perplexity: 9.6375
Epoch [1/3], Step [1125/1618], Loss: 2.2963, Perplexity: 9.9374
Epoch [1/3], Step [1126/1618], Loss: 2.2457, Perplexity: 9.4474
Epoch [1/3], Step [1127/1618], Loss: 2.4559, Perplexity: 11.6568
Epoch [1/3], Step [1128/1618], Loss: 2.2774, Perplexity: 9.7514
Epoch [1/3], Step [1129/1618], Loss: 2.2759, Perplexity: 9.7365
Epoch [1/3], Step [1130/1618], Loss: 2.9650, Perplexity: 19.3941
Epoch [1/3], Step [1131/1618], Loss: 2.2484, Perplexity: 9.4725
Epoch [1/3], Step [1132/1618], Loss: 2.2972, Perplexity: 9.9467
Epoch [1/3], Step [1133/1618], Loss: 2.7857, Perplexity: 16.2115
Epoch [1/3], Step [1134/1618], Loss: 2.5241, Perplexity: 12.4800
Epoch [1/3], Step [1135/1618], Loss: 2.4778, Perplexity: 11.9153
Epoch [1/3], Step [1136/1618], Loss: 2.2670, Perplexity: 9.6503
Epoch [1/3], Step [1137/1618], Loss: 2.3793, Perplexity: 10.7973
Epoch [1/3], Step [1138/1618], Loss: 2.5930, Perplexity: 13.3704
Epoch [1/3], Step [1139/1618], Loss: 2.4104, Perplexity: 11.1389
Epoch [1/3], Step [1140/1618], Loss: 2.2178, Perplexity: 9.1870
Epoch [1/3], Step [1141/1618], Loss: 2.2416, Perplexity: 9.4086
Epoch [1/3], Step [1142/1618], Loss: 2.2401, Perplexity: 9.3940
Epoch [1/3], Step [1143/1618], Loss: 2.2592, Perplexity: 9.5756
Epoch [1/3], Step [1144/1618], Loss: 2.2615, Perplexity: 9.5977
Epoch [1/3], Step [1145/1618], Loss: 2.2264, Perplexity: 9.2667
Epoch [1/3], Step [1146/1618], Loss: 2.3571, Perplexity: 10.5607
Epoch [1/3], Step [1147/1618], Loss: 2.1559, Perplexity: 8.6355
Epoch [1/3], Step [1148/1618], Loss: 2.1498, Perplexity: 8.5831
Epoch [1/3], Step [1149/1618], Loss: 2.2204, Perplexity: 9.2114
Epoch [1/3], Step [1150/1618], Loss: 2.1031, Perplexity: 8.1916
Epoch [1/3], Step [1151/1618], Loss: 2.5149, Perplexity: 12.3652
Epoch [1/3], Step [1152/1618], Loss: 2.5927, Perplexity: 13.3664
Epoch [1/3], Step [1153/1618], Loss: 2.2239, Perplexity: 9.2435
Epoch [1/3], Step [1154/1618], Loss: 2.2435, Perplexity: 9.4263
Epoch [1/3], Step [1155/1618], Loss: 2.1016, Perplexity: 8.1790
Epoch [1/3], Step [1156/1618], Loss: 2.2844, Perplexity: 9.8200
Epoch [1/3], Step [1157/1618], Loss: 2.3152, Perplexity: 10.1268
Epoch [1/3], Step [1158/1618], Loss: 2.2417, Perplexity: 9.4091
Epoch [1/3], Step [1159/1618], Loss: 2.3691, Perplexity: 10.6874
Epoch [1/3], Step [1160/1618], Loss: 2.3078, Perplexity: 10.0519
Epoch [1/3], Step [1161/1618], Loss: 2.2037, Perplexity: 9.0585
Epoch [1/3], Step [1162/1618], Loss: 2.2661, Perplexity: 9.6415
Epoch [1/3], Step [1163/1618], Loss: 2.3181, Perplexity: 10.1563
Epoch [1/3], Step [1164/1618], Loss: 2.3376, Perplexity: 10.3565
Epoch [1/3], Step [1165/1618], Loss: 2.2761, Perplexity: 9.7386
Epoch [1/3], Step [1166/1618], Loss: 2.3593, Perplexity: 10.5832
Epoch [1/3], Step [1167/1618], Loss: 2.1691, Perplexity: 8.7503
Epoch [1/3], Step [1168/1618], Loss: 2.2827, Perplexity: 9.8035
Epoch [1/3], Step [1169/1618], Loss: 3.0744, Perplexity: 21.6359
Epoch [1/3], Step [1170/1618], Loss: 2.2523, Perplexity: 9.5093
Epoch [1/3], Step [1171/1618], Loss: 2.5429, Perplexity: 12.7169
Epoch [1/3], Step [1172/1618], Loss: 2.3634, Perplexity: 10.6266
Epoch [1/3], Step [1173/1618], Loss: 2.5376, Perplexity: 12.6496
Epoch [1/3], Step [1174/1618], Loss: 2.3309, Perplexity: 10.2871
Epoch [1/3], Step [1175/1618], Loss: 2.2467, Perplexity: 9.4565
Epoch [1/3], Step [1176/1618], Loss: 2.2362, Perplexity: 9.3575
Epoch [1/3], Step [1177/1618], Loss: 2.4125, Perplexity: 11.1621
Epoch [1/3], Step [1178/1618], Loss: 2.2792, Perplexity: 9.7687
Epoch [1/3], Step [1179/1618], Loss: 2.3289, Perplexity: 10.2666
Epoch [1/3], Step [1180/1618], Loss: 2.2924, Perplexity: 9.8984
Epoch [1/3], Step [1181/1618], Loss: 2.2773, Perplexity: 9.7501
Epoch [1/3], Step [1182/1618], Loss: 2.2891, Perplexity: 9.8661
Epoch [1/3], Step [1183/1618], Loss: 2.2192, Perplexity: 9.1996
Epoch [1/3], Step [1184/1618], Loss: 2.4054, Perplexity: 11.0834
Epoch [1/3], Step [1185/1618], Loss: 2.2270, Perplexity: 9.2722
Epoch [1/3], Step [1186/1618], Loss: 2.2617, Perplexity: 9.5999
Epoch [1/3], Step [1187/1618], Loss: 2.3061, Perplexity: 10.0355
Epoch [1/3], Step [1188/1618], Loss: 2.2294, Perplexity: 9.2944
Epoch [1/3], Step [1189/1618], Loss: 2.1058, Perplexity: 8.2136
Epoch [1/3], Step [1190/1618], Loss: 2.2869, Perplexity: 9.8445
Epoch [1/3], Step [1191/1618], Loss: 2.1301, Perplexity: 8.4161
Epoch [1/3], Step [1192/1618], Loss: 2.3889, Perplexity: 10.9017
Epoch [1/3], Step [1193/1618], Loss: 2.5009, Perplexity: 12.1930
Epoch [1/3], Step [1194/1618], Loss: 2.2574, Perplexity: 9.5578
Epoch [1/3], Step [1195/1618], Loss: 2.3837, Perplexity: 10.8449
Epoch [1/3], Step [1196/1618], Loss: 2.2593, Perplexity: 9.5760
Epoch [1/3], Step [1197/1618], Loss: 2.1812, Perplexity: 8.8566
Epoch [1/3], Step [1198/1618], Loss: 2.2488, Perplexity: 9.4759
Epoch [1/3], Step [1199/1618], Loss: 2.2730, Perplexity: 9.7083
Epoch [1/3], Step [1200/1618], Loss: 2.3397, Perplexity: 10.3780
Epoch [1/3], Step [1201/1618], Loss: 2.1475, Perplexity: 8.5637
Epoch [1/3], Step [1202/1618], Loss: 2.2660, Perplexity: 9.6406
Epoch [1/3], Step [1203/1618], Loss: 2.3525, Perplexity: 10.5119
Epoch [1/3], Step [1204/1618], Loss: 2.2254, Perplexity: 9.2576
Epoch [1/3], Step [1205/1618], Loss: 2.3071, Perplexity: 10.0448
Epoch [1/3], Step [1206/1618], Loss: 2.1754, Perplexity: 8.8058
Epoch [1/3], Step [1207/1618], Loss: 2.1876, Perplexity: 8.9141
Epoch [1/3], Step [1208/1618], Loss: 2.2042, Perplexity: 9.0630
Epoch [1/3], Step [1209/1618], Loss: 2.3385, Perplexity: 10.3658
Epoch [1/3], Step [1210/1618], Loss: 2.2409, Perplexity: 9.4021
Epoch [1/3], Step [1211/1618], Loss: 2.2993, Perplexity: 9.9674
Epoch [1/3], Step [1212/1618], Loss: 2.2372, Perplexity: 9.3667
Epoch [1/3], Step [1213/1618], Loss: 2.2685, Perplexity: 9.6647
Epoch [1/3], Step [1214/1618], Loss: 2.2334, Perplexity: 9.3315
Epoch [1/3], Step [1215/1618], Loss: 2.3590, Perplexity: 10.5806
Epoch [1/3], Step [1216/1618], Loss: 2.2272, Perplexity: 9.2734
Epoch [1/3], Step [1217/1618], Loss: 2.2758, Perplexity: 9.7357
Epoch [1/3], Step [1218/1618], Loss: 2.3182, Perplexity: 10.1569
Epoch [1/3], Step [1219/1618], Loss: 2.0871, Perplexity: 8.0614
Epoch [1/3], Step [1220/1618], Loss: 2.3447, Perplexity: 10.4303
Epoch [1/3], Step [1221/1618], Loss: 2.2984, Perplexity: 9.9584
Epoch [1/3], Step [1222/1618], Loss: 2.3218, Perplexity: 10.1936
Epoch [1/3], Step [1223/1618], Loss: 2.2791, Perplexity: 9.7681
Epoch [1/3], Step [1224/1618], Loss: 2.2263, Perplexity: 9.2656
Epoch [1/3], Step [1225/1618], Loss: 2.3339, Perplexity: 10.3178
Epoch [1/3], Step [1226/1618], Loss: 3.0234, Perplexity: 20.5615
Epoch [1/3], Step [1227/1618], Loss: 2.2651, Perplexity: 9.6321
Epoch [1/3], Step [1228/1618], Loss: 2.4522, Perplexity: 11.6134
Epoch [1/3], Step [1229/1618], Loss: 2.2295, Perplexity: 9.2951
Epoch [1/3], Step [1230/1618], Loss: 2.3804, Perplexity: 10.8094
Epoch [1/3], Step [1231/1618], Loss: 2.3799, Perplexity: 10.8038
Epoch [1/3], Step [1232/1618], Loss: 2.4276, Perplexity: 11.3314
Epoch [1/3], Step [1233/1618], Loss: 2.5600, Perplexity: 12.9353
Epoch [1/3], Step [1234/1618], Loss: 2.6275, Perplexity: 13.8388
Epoch [1/3], Step [1235/1618], Loss: 2.2371, Perplexity: 9.3662
Epoch [1/3], Step [1236/1618], Loss: 2.2670, Perplexity: 9.6507
Epoch [1/3], Step [1237/1618], Loss: 2.4142, Perplexity: 11.1806
Epoch [1/3], Step [1238/1618], Loss: 2.1637, Perplexity: 8.7032
Epoch [1/3], Step [1239/1618], Loss: 2.1910, Perplexity: 8.9440
Epoch [1/3], Step [1240/1618], Loss: 2.3754, Perplexity: 10.7557
Epoch [1/3], Step [1241/1618], Loss: 2.1275, Perplexity: 8.3937
Epoch [1/3], Step [1242/1618], Loss: 2.1679, Perplexity: 8.7400
Epoch [1/3], Step [1243/1618], Loss: 2.2657, Perplexity: 9.6381
Epoch [1/3], Step [1244/1618], Loss: 2.2610, Perplexity: 9.5931
Epoch [1/3], Step [1245/1618], Loss: 2.1978, Perplexity: 9.0053
Epoch [1/3], Step [1246/1618], Loss: 2.2083, Perplexity: 9.1004
Epoch [1/3], Step [1247/1618], Loss: 2.1748, Perplexity: 8.8003
Epoch [1/3], Step [1248/1618], Loss: 2.2475, Perplexity: 9.4637
Epoch [1/3], Step [1249/1618], Loss: 2.1371, Perplexity: 8.4752
Epoch [1/3], Step [1250/1618], Loss: 2.4138, Perplexity: 11.1760
Epoch [1/3], Step [1251/1618], Loss: 2.1546, Perplexity: 8.6245
Epoch [1/3], Step [1252/1618], Loss: 2.1993, Perplexity: 9.0186
Epoch [1/3], Step [1253/1618], Loss: 2.2897, Perplexity: 9.8724
Epoch [1/3], Step [1254/1618], Loss: 2.3203, Perplexity: 10.1784
Epoch [1/3], Step [1255/1618], Loss: 2.2394, Perplexity: 9.3877
Epoch [1/3], Step [1256/1618], Loss: 2.3143, Perplexity: 10.1175
Epoch [1/3], Step [1257/1618], Loss: 2.3321, Perplexity: 10.2999
Epoch [1/3], Step [1258/1618], Loss: 2.2012, Perplexity: 9.0358
Epoch [1/3], Step [1259/1618], Loss: 2.3607, Perplexity: 10.5984
Epoch [1/3], Step [1260/1618], Loss: 2.5797, Perplexity: 13.1931
Epoch [1/3], Step [1261/1618], Loss: 2.2791, Perplexity: 9.7675
Epoch [1/3], Step [1262/1618], Loss: 2.3019, Perplexity: 9.9932
Epoch [1/3], Step [1263/1618], Loss: 2.5960, Perplexity: 13.4104
Epoch [1/3], Step [1264/1618], Loss: 2.1310, Perplexity: 8.4232
Epoch [1/3], Step [1265/1618], Loss: 2.2866, Perplexity: 9.8419
Epoch [1/3], Step [1266/1618], Loss: 2.1450, Perplexity: 8.5422
Epoch [1/3], Step [1267/1618], Loss: 2.4537, Perplexity: 11.6311
Epoch [1/3], Step [1268/1618], Loss: 2.1561, Perplexity: 8.6377
Epoch [1/3], Step [1269/1618], Loss: 2.2243, Perplexity: 9.2468
Epoch [1/3], Step [1270/1618], Loss: 2.0970, Perplexity: 8.1415
Epoch [1/3], Step [1271/1618], Loss: 2.2045, Perplexity: 9.0660
Epoch [1/3], Step [1272/1618], Loss: 2.2042, Perplexity: 9.0631
Epoch [1/3], Step [1273/1618], Loss: 2.2937, Perplexity: 9.9114
Epoch [1/3], Step [1274/1618], Loss: 2.1947, Perplexity: 8.9770
Epoch [1/3], Step [1275/1618], Loss: 2.1266, Perplexity: 8.3862
Epoch [1/3], Step [1276/1618], Loss: 4.1089, Perplexity: 60.8814
Epoch [1/3], Step [1277/1618], Loss: 2.2542, Perplexity: 9.5277
Epoch [1/3], Step [1278/1618], Loss: 2.2800, Perplexity: 9.7771
Epoch [1/3], Step [1279/1618], Loss: 2.3804, Perplexity: 10.8088
Epoch [1/3], Step [1280/1618], Loss: 2.2624, Perplexity: 9.6057
Epoch [1/3], Step [1281/1618], Loss: 2.7827, Perplexity: 16.1634
Epoch [1/3], Step [1282/1618], Loss: 2.2644, Perplexity: 9.6250
Epoch [1/3], Step [1283/1618], Loss: 2.2813, Perplexity: 9.7896
Epoch [1/3], Step [1284/1618], Loss: 2.5950, Perplexity: 13.3969
Epoch [1/3], Step [1285/1618], Loss: 2.1877, Perplexity: 8.9146
Epoch [1/3], Step [1286/1618], Loss: 2.5605, Perplexity: 12.9417
Epoch [1/3], Step [1287/1618], Loss: 2.2283, Perplexity: 9.2836
Epoch [1/3], Step [1288/1618], Loss: 2.2722, Perplexity: 9.7009
Epoch [1/3], Step [1289/1618], Loss: 2.3393, Perplexity: 10.3737
Epoch [1/3], Step [1290/1618], Loss: 2.3070, Perplexity: 10.0439
Epoch [1/3], Step [1291/1618], Loss: 2.2476, Perplexity: 9.4646
Epoch [1/3], Step [1292/1618], Loss: 2.2646, Perplexity: 9.6271
Epoch [1/3], Step [1293/1618], Loss: 2.3691, Perplexity: 10.6874
Epoch [1/3], Step [1294/1618], Loss: 2.4083, Perplexity: 11.1146
Epoch [1/3], Step [1295/1618], Loss: 2.2808, Perplexity: 9.7845
Epoch [1/3], Step [1296/1618], Loss: 2.1767, Perplexity: 8.8173
Epoch [1/3], Step [1297/1618], Loss: 2.4890, Perplexity: 12.0496
Epoch [1/3], Step [1298/1618], Loss: 2.2521, Perplexity: 9.5073
Epoch [1/3], Step [1299/1618], Loss: 2.2292, Perplexity: 9.2924
Epoch [1/3], Step [1300/1618], Loss: 2.2814, Perplexity: 9.7905
Epoch [1/3], Step [1301/1618], Loss: 2.1716, Perplexity: 8.7720
Epoch [1/3], Step [1302/1618], Loss: 2.5069, Perplexity: 12.2670
Epoch [1/3], Step [1303/1618], Loss: 2.3261, Perplexity: 10.2380
Epoch [1/3], Step [1304/1618], Loss: 2.5516, Perplexity: 12.8282
Epoch [1/3], Step [1305/1618], Loss: 2.2015, Perplexity: 9.0385
Epoch [1/3], Step [1306/1618], Loss: 2.1296, Perplexity: 8.4116
Epoch [1/3], Step [1307/1618], Loss: 2.2982, Perplexity: 9.9566
Epoch [1/3], Step [1308/1618], Loss: 2.1812, Perplexity: 8.8569
Epoch [1/3], Step [1309/1618], Loss: 2.3681, Perplexity: 10.6768
Epoch [1/3], Step [1310/1618], Loss: 2.6720, Perplexity: 14.4692
Epoch [1/3], Step [1311/1618], Loss: 2.3618, Perplexity: 10.6103
Epoch [1/3], Step [1312/1618], Loss: 2.2113, Perplexity: 9.1275
Epoch [1/3], Step [1313/1618], Loss: 2.2816, Perplexity: 9.7920
Epoch [1/3], Step [1314/1618], Loss: 3.3683, Perplexity: 29.0282
Epoch [1/3], Step [1315/1618], Loss: 2.2662, Perplexity: 9.6428
Epoch [1/3], Step [1316/1618], Loss: 2.2594, Perplexity: 9.5776
Epoch [1/3], Step [1317/1618], Loss: 2.5678, Perplexity: 13.0377
Epoch [1/3], Step [1318/1618], Loss: 2.3256, Perplexity: 10.2324
Epoch [1/3], Step [1319/1618], Loss: 2.1768, Perplexity: 8.8182
Epoch [1/3], Step [1320/1618], Loss: 2.3630, Perplexity: 10.6230
Epoch [1/3], Step [1321/1618], Loss: 2.1120, Perplexity: 8.2650
Epoch [1/3], Step [1322/1618], Loss: 2.1403, Perplexity: 8.5023
Epoch [1/3], Step [1323/1618], Loss: 2.3690, Perplexity: 10.6862
Epoch [1/3], Step [1324/1618], Loss: 2.1952, Perplexity: 8.9816
Epoch [1/3], Step [1325/1618], Loss: 2.2328, Perplexity: 9.3258
Epoch [1/3], Step [1326/1618], Loss: 2.2587, Perplexity: 9.5710
Epoch [1/3], Step [1327/1618], Loss: 2.2162, Perplexity: 9.1725
Epoch [1/3], Step [1328/1618], Loss: 2.2931, Perplexity: 9.9052
Epoch [1/3], Step [1329/1618], Loss: 2.2537, Perplexity: 9.5228
Epoch [1/3], Step [1330/1618], Loss: 2.2957, Perplexity: 9.9313
Epoch [1/3], Step [1331/1618], Loss: 2.1737, Perplexity: 8.7911
Epoch [1/3], Step [1332/1618], Loss: 2.1218, Perplexity: 8.3459
Epoch [1/3], Step [1333/1618], Loss: 2.1390, Perplexity: 8.4913
Epoch [1/3], Step [1334/1618], Loss: 2.1105, Perplexity: 8.2526
Epoch [1/3], Step [1335/1618], Loss: 2.2351, Perplexity: 9.3472
Epoch [1/3], Step [1336/1618], Loss: 2.2229, Perplexity: 9.2338
Epoch [1/3], Step [1337/1618], Loss: 2.1614, Perplexity: 8.6832
Epoch [1/3], Step [1338/1618], Loss: 2.1676, Perplexity: 8.7374
Epoch [1/3], Step [1339/1618], Loss: 2.1552, Perplexity: 8.6293
Epoch [1/3], Step [1340/1618], Loss: 3.2197, Perplexity: 25.0201
Epoch [1/3], Step [1341/1618], Loss: 2.1963, Perplexity: 8.9913
Epoch [1/3], Step [1342/1618], Loss: 2.1995, Perplexity: 9.0202
Epoch [1/3], Step [1343/1618], Loss: 2.3447, Perplexity: 10.4306
Epoch [1/3], Step [1344/1618], Loss: 2.2565, Perplexity: 9.5494
Epoch [1/3], Step [1345/1618], Loss: 2.2531, Perplexity: 9.5173
Epoch [1/3], Step [1346/1618], Loss: 2.1778, Perplexity: 8.8273
Epoch [1/3], Step [1347/1618], Loss: 2.7055, Perplexity: 14.9624
Epoch [1/3], Step [1348/1618], Loss: 2.1342, Perplexity: 8.4506
Epoch [1/3], Step [1349/1618], Loss: 2.3418, Perplexity: 10.4000
Epoch [1/3], Step [1350/1618], Loss: 2.5636, Perplexity: 12.9820
Epoch [1/3], Step [1351/1618], Loss: 2.3440, Perplexity: 10.4225
Epoch [1/3], Step [1352/1618], Loss: 2.1554, Perplexity: 8.6312
Epoch [1/3], Step [1353/1618], Loss: 2.1451, Perplexity: 8.5430
Epoch [1/3], Step [1354/1618], Loss: 2.5365, Perplexity: 12.6355
Epoch [1/3], Step [1355/1618], Loss: 2.0842, Perplexity: 8.0380
Epoch [1/3], Step [1356/1618], Loss: 2.1910, Perplexity: 8.9442
Epoch [1/3], Step [1357/1618], Loss: 2.2931, Perplexity: 9.9059
Epoch [1/3], Step [1358/1618], Loss: 2.7341, Perplexity: 15.3959
Epoch [1/3], Step [1359/1618], Loss: 2.2400, Perplexity: 9.3937
Epoch [1/3], Step [1360/1618], Loss: 2.3400, Perplexity: 10.3815
Epoch [1/3], Step [1361/1618], Loss: 2.1585, Perplexity: 8.6583
Epoch [1/3], Step [1362/1618], Loss: 2.2231, Perplexity: 9.2356
Epoch [1/3], Step [1363/1618], Loss: 2.2566, Perplexity: 9.5509
Epoch [1/3], Step [1364/1618], Loss: 2.3805, Perplexity: 10.8102
Epoch [1/3], Step [1365/1618], Loss: 2.1976, Perplexity: 9.0031
Epoch [1/3], Step [1366/1618], Loss: 2.5852, Perplexity: 13.2654
Epoch [1/3], Step [1367/1618], Loss: 2.2450, Perplexity: 9.4402
Epoch [1/3], Step [1368/1618], Loss: 2.1360, Perplexity: 8.4654
Epoch [1/3], Step [1369/1618], Loss: 2.8462, Perplexity: 17.2230
Epoch [1/3], Step [1370/1618], Loss: 2.1900, Perplexity: 8.9353
Epoch [1/3], Step [1371/1618], Loss: 2.6629, Perplexity: 14.3377
Epoch [1/3], Step [1372/1618], Loss: 2.3808, Perplexity: 10.8136
Epoch [1/3], Step [1373/1618], Loss: 2.2474, Perplexity: 9.4628
Epoch [1/3], Step [1374/1618], Loss: 2.3291, Perplexity: 10.2688
Epoch [1/3], Step [1375/1618], Loss: 2.3274, Perplexity: 10.2517
Epoch [1/3], Step [1376/1618], Loss: 2.1688, Perplexity: 8.7474
Epoch [1/3], Step [1377/1618], Loss: 2.2220, Perplexity: 9.2254
Epoch [1/3], Step [1378/1618], Loss: 2.2047, Perplexity: 9.0673
Epoch [1/3], Step [1379/1618], Loss: 2.2287, Perplexity: 9.2882
Epoch [1/3], Step [1380/1618], Loss: 2.5470, Perplexity: 12.7683
Epoch [1/3], Step [1381/1618], Loss: 2.2267, Perplexity: 9.2689
Epoch [1/3], Step [1382/1618], Loss: 2.1977, Perplexity: 9.0040
Epoch [1/3], Step [1383/1618], Loss: 2.2631, Perplexity: 9.6126
Epoch [1/3], Step [1384/1618], Loss: 2.1662, Perplexity: 8.7249
Epoch [1/3], Step [1385/1618], Loss: 2.2219, Perplexity: 9.2249
Epoch [1/3], Step [1386/1618], Loss: 3.3819, Perplexity: 29.4270
Epoch [1/3], Step [1387/1618], Loss: 2.1316, Perplexity: 8.4287
Epoch [1/3], Step [1388/1618], Loss: 2.1404, Perplexity: 8.5029
Epoch [1/3], Step [1389/1618], Loss: 2.1861, Perplexity: 8.9004
Epoch [1/3], Step [1390/1618], Loss: 2.1822, Perplexity: 8.8659
Epoch [1/3], Step [1391/1618], Loss: 2.1768, Perplexity: 8.8178
Epoch [1/3], Step [1392/1618], Loss: 2.2159, Perplexity: 9.1698
Epoch [1/3], Step [1393/1618], Loss: 2.2616, Perplexity: 9.5981
Epoch [1/3], Step [1394/1618], Loss: 2.2649, Perplexity: 9.6299
Epoch [1/3], Step [1395/1618], Loss: 2.1981, Perplexity: 9.0075
Epoch [1/3], Step [1396/1618], Loss: 2.2931, Perplexity: 9.9056
Epoch [1/3], Step [1397/1618], Loss: 2.4657, Perplexity: 11.7719
Epoch [1/3], Step [1398/1618], Loss: 2.1201, Perplexity: 8.3321
Epoch [1/3], Step [1399/1618], Loss: 2.2302, Perplexity: 9.3021
Epoch [1/3], Step [1400/1618], Loss: 2.1932, Perplexity: 8.9639
Epoch [1/3], Step [1401/1618], Loss: 2.1653, Perplexity: 8.7172
Epoch [1/3], Step [1402/1618], Loss: 2.5845, Perplexity: 13.2573
Epoch [1/3], Step [1403/1618], Loss: 2.1209, Perplexity: 8.3385
Epoch [1/3], Step [1404/1618], Loss: 2.1449, Perplexity: 8.5415
Epoch [1/3], Step [1405/1618], Loss: 2.3402, Perplexity: 10.3837
Epoch [1/3], Step [1406/1618], Loss: 2.9007, Perplexity: 18.1863
Epoch [1/3], Step [1407/1618], Loss: 3.2181, Perplexity: 24.9794
Epoch [1/3], Step [1408/1618], Loss: 2.2085, Perplexity: 9.1020
Epoch [1/3], Step [1409/1618], Loss: 2.9324, Perplexity: 18.7735
Epoch [1/3], Step [1410/1618], Loss: 2.1016, Perplexity: 8.1790
Epoch [1/3], Step [1411/1618], Loss: 2.3175, Perplexity: 10.1503
Epoch [1/3], Step [1412/1618], Loss: 2.1836, Perplexity: 8.8780
Epoch [1/3], Step [1413/1618], Loss: 2.1899, Perplexity: 8.9343
Epoch [1/3], Step [1414/1618], Loss: 2.1874, Perplexity: 8.9121
Epoch [1/3], Step [1415/1618], Loss: 2.3705, Perplexity: 10.7031
Epoch [1/3], Step [1416/1618], Loss: 2.2690, Perplexity: 9.6699
Epoch [1/3], Step [1417/1618], Loss: 2.3756, Perplexity: 10.7577
Epoch [1/3], Step [1418/1618], Loss: 2.2630, Perplexity: 9.6121
Epoch [1/3], Step [1419/1618], Loss: 2.2602, Perplexity: 9.5847
Epoch [1/3], Step [1420/1618], Loss: 2.3076, Perplexity: 10.0504
Epoch [1/3], Step [1421/1618], Loss: 2.3890, Perplexity: 10.9029
Epoch [1/3], Step [1422/1618], Loss: 2.0975, Perplexity: 8.1456
Epoch [1/3], Step [1423/1618], Loss: 2.6218, Perplexity: 13.7609
Epoch [1/3], Step [1424/1618], Loss: 2.4493, Perplexity: 11.5803
Epoch [1/3], Step [1425/1618], Loss: 2.1775, Perplexity: 8.8246
Epoch [1/3], Step [1426/1618], Loss: 2.3438, Perplexity: 10.4203
Epoch [1/3], Step [1427/1618], Loss: 2.5532, Perplexity: 12.8484
Epoch [1/3], Step [1428/1618], Loss: 2.1579, Perplexity: 8.6529
Epoch [1/3], Step [1429/1618], Loss: 2.2672, Perplexity: 9.6520
Epoch [1/3], Step [1430/1618], Loss: 2.2571, Perplexity: 9.5557
Epoch [1/3], Step [1431/1618], Loss: 2.3124, Perplexity: 10.0982
Epoch [1/3], Step [1432/1618], Loss: 2.2368, Perplexity: 9.3635
Epoch [1/3], Step [1433/1618], Loss: 2.3274, Perplexity: 10.2513
Epoch [1/3], Step [1434/1618], Loss: 2.1828, Perplexity: 8.8710
Epoch [1/3], Step [1435/1618], Loss: 2.8133, Perplexity: 16.6645
Epoch [1/3], Step [1436/1618], Loss: 2.4490, Perplexity: 11.5762
Epoch [1/3], Step [1437/1618], Loss: 2.1567, Perplexity: 8.6422
Epoch [1/3], Step [1438/1618], Loss: 2.1726, Perplexity: 8.7810
Epoch [1/3], Step [1439/1618], Loss: 2.3217, Perplexity: 10.1934
Epoch [1/3], Step [1440/1618], Loss: 2.1959, Perplexity: 8.9882
Epoch [1/3], Step [1441/1618], Loss: 2.1237, Perplexity: 8.3617
Epoch [1/3], Step [1442/1618], Loss: 2.2502, Perplexity: 9.4895
Epoch [1/3], Step [1443/1618], Loss: 2.6135, Perplexity: 13.6469
Epoch [1/3], Step [1444/1618], Loss: 2.1690, Perplexity: 8.7496
Epoch [1/3], Step [1445/1618], Loss: 2.1528, Perplexity: 8.6089
Epoch [1/3], Step [1446/1618], Loss: 2.5018, Perplexity: 12.2045
Epoch [1/3], Step [1447/1618], Loss: 2.1419, Perplexity: 8.5155
Epoch [1/3], Step [1448/1618], Loss: 2.3441, Perplexity: 10.4237
Epoch [1/3], Step [1449/1618], Loss: 2.6881, Perplexity: 14.7044
Epoch [1/3], Step [1450/1618], Loss: 2.0791, Perplexity: 7.9974
Epoch [1/3], Step [1451/1618], Loss: 2.2614, Perplexity: 9.5964
Epoch [1/3], Step [1452/1618], Loss: 2.1968, Perplexity: 8.9959
Epoch [1/3], Step [1453/1618], Loss: 2.1269, Perplexity: 8.3888
Epoch [1/3], Step [1454/1618], Loss: 2.2615, Perplexity: 9.5971
Epoch [1/3], Step [1455/1618], Loss: 2.0990, Perplexity: 8.1577
Epoch [1/3], Step [1456/1618], Loss: 2.6837, Perplexity: 14.6390
Epoch [1/3], Step [1457/1618], Loss: 2.1134, Perplexity: 8.2761
Epoch [1/3], Step [1458/1618], Loss: 2.4019, Perplexity: 11.0443
Epoch [1/3], Step [1459/1618], Loss: 2.2317, Perplexity: 9.3156
Epoch [1/3], Step [1460/1618], Loss: 2.1053, Perplexity: 8.2094
Epoch [1/3], Step [1461/1618], Loss: 2.3546, Perplexity: 10.5340
Epoch [1/3], Step [1462/1618], Loss: 2.1966, Perplexity: 8.9944
Epoch [1/3], Step [1463/1618], Loss: 2.2627, Perplexity: 9.6090
Epoch [1/3], Step [1464/1618], Loss: 2.3235, Perplexity: 10.2113
Epoch [1/3], Step [1465/1618], Loss: 2.1910, Perplexity: 8.9444
Epoch [1/3], Step [1466/1618], Loss: 2.5658, Perplexity: 13.0110
Epoch [1/3], Step [1467/1618], Loss: 2.7271, Perplexity: 15.2886
Epoch [1/3], Step [1468/1618], Loss: 2.2818, Perplexity: 9.7947
Epoch [1/3], Step [1469/1618], Loss: 2.2521, Perplexity: 9.5075
Epoch [1/3], Step [1470/1618], Loss: 2.2500, Perplexity: 9.4878
Epoch [1/3], Step [1471/1618], Loss: 2.2336, Perplexity: 9.3338
Epoch [1/3], Step [1472/1618], Loss: 2.1186, Perplexity: 8.3195
Epoch [1/3], Step [1473/1618], Loss: 2.2683, Perplexity: 9.6625
Epoch [1/3], Step [1474/1618], Loss: 2.2076, Perplexity: 9.0942
Epoch [1/3], Step [1475/1618], Loss: 2.2413, Perplexity: 9.4052
Epoch [1/3], Step [1476/1618], Loss: 3.1879, Perplexity: 24.2369
Epoch [1/3], Step [1477/1618], Loss: 2.1414, Perplexity: 8.5115
Epoch [1/3], Step [1478/1618], Loss: 2.2868, Perplexity: 9.8438
Epoch [1/3], Step [1479/1618], Loss: 2.2611, Perplexity: 9.5938
Epoch [1/3], Step [1480/1618], Loss: 2.4946, Perplexity: 12.1166
Epoch [1/3], Step [1481/1618], Loss: 2.2725, Perplexity: 9.7034
Epoch [1/3], Step [1482/1618], Loss: 2.2801, Perplexity: 9.7777
Epoch [1/3], Step [1483/1618], Loss: 2.5448, Perplexity: 12.7406
Epoch [1/3], Step [1484/1618], Loss: 2.1878, Perplexity: 8.9153
Epoch [1/3], Step [1485/1618], Loss: 2.3888, Perplexity: 10.9001
Epoch [1/3], Step [1486/1618], Loss: 2.1937, Perplexity: 8.9685
Epoch [1/3], Step [1487/1618], Loss: 2.1228, Perplexity: 8.3546
Epoch [1/3], Step [1488/1618], Loss: 2.2352, Perplexity: 9.3482
Epoch [1/3], Step [1489/1618], Loss: 2.1280, Perplexity: 8.3981
Epoch [1/3], Step [1490/1618], Loss: 2.2005, Perplexity: 9.0298
Epoch [1/3], Step [1491/1618], Loss: 2.2455, Perplexity: 9.4455
Epoch [1/3], Step [1492/1618], Loss: 2.2563, Perplexity: 9.5476
Epoch [1/3], Step [1493/1618], Loss: 2.5686, Perplexity: 13.0477
Epoch [1/3], Step [1494/1618], Loss: 2.0932, Perplexity: 8.1110
Epoch [1/3], Step [1495/1618], Loss: 2.2742, Perplexity: 9.7203
Epoch [1/3], Step [1496/1618], Loss: 2.2560, Perplexity: 9.5446
Epoch [1/3], Step [1497/1618], Loss: 2.0250, Perplexity: 7.5758
Epoch [1/3], Step [1498/1618], Loss: 2.5257, Perplexity: 12.4997
Epoch [1/3], Step [1499/1618], Loss: 2.2023, Perplexity: 9.0461
Epoch [1/3], Step [1500/1618], Loss: 2.1902, Perplexity: 8.9366
Epoch [1/3], Step [1501/1618], Loss: 2.2250, Perplexity: 9.2531
Epoch [1/3], Step [1502/1618], Loss: 2.1866, Perplexity: 8.9049
Epoch [1/3], Step [1503/1618], Loss: 2.1712, Perplexity: 8.7687
Epoch [1/3], Step [1504/1618], Loss: 2.3068, Perplexity: 10.0423
Epoch [1/3], Step [1505/1618], Loss: 2.5425, Perplexity: 12.7118
Epoch [1/3], Step [1506/1618], Loss: 2.2226, Perplexity: 9.2311
Epoch [1/3], Step [1507/1618], Loss: 2.1947, Perplexity: 8.9775
Epoch [1/3], Step [1508/1618], Loss: 2.2831, Perplexity: 9.8071
Epoch [1/3], Step [1509/1618], Loss: 2.2530, Perplexity: 9.5158
Epoch [1/3], Step [1510/1618], Loss: 2.1503, Perplexity: 8.5874
Epoch [1/3], Step [1511/1618], Loss: 2.3077, Perplexity: 10.0515
Epoch [1/3], Step [1512/1618], Loss: 2.1230, Perplexity: 8.3562
Epoch [1/3], Step [1513/1618], Loss: 2.2271, Perplexity: 9.2728
Epoch [1/3], Step [1514/1618], Loss: 2.3320, Perplexity: 10.2988
Epoch [1/3], Step [1515/1618], Loss: 2.1139, Perplexity: 8.2803
Epoch [1/3], Step [1516/1618], Loss: 2.1590, Perplexity: 8.6625
Epoch [1/3], Step [1517/1618], Loss: 2.1682, Perplexity: 8.7429
Epoch [1/3], Step [1518/1618], Loss: 2.1680, Perplexity: 8.7406
Epoch [1/3], Step [1519/1618], Loss: 2.1014, Perplexity: 8.1779
Epoch [1/3], Step [1520/1618], Loss: 2.0164, Perplexity: 7.5112
Epoch [1/3], Step [1521/1618], Loss: 2.1719, Perplexity: 8.7751
Epoch [1/3], Step [1522/1618], Loss: 2.6573, Perplexity: 14.2581
Epoch [1/3], Step [1523/1618], Loss: 2.1245, Perplexity: 8.3687
Epoch [1/3], Step [1524/1618], Loss: 2.2634, Perplexity: 9.6158
Epoch [1/3], Step [1525/1618], Loss: 2.1718, Perplexity: 8.7744
Epoch [1/3], Step [1526/1618], Loss: 2.1705, Perplexity: 8.7629
Epoch [1/3], Step [1527/1618], Loss: 2.1864, Perplexity: 8.9029
Epoch [1/3], Step [1528/1618], Loss: 2.1561, Perplexity: 8.6371
Epoch [1/3], Step [1529/1618], Loss: 2.7142, Perplexity: 15.0922
Epoch [1/3], Step [1530/1618], Loss: 2.1865, Perplexity: 8.9037
Epoch [1/3], Step [1531/1618], Loss: 2.3038, Perplexity: 10.0121
Epoch [1/3], Step [1532/1618], Loss: 2.0963, Perplexity: 8.1362
Epoch [1/3], Step [1533/1618], Loss: 2.5280, Perplexity: 12.5284
Epoch [1/3], Step [1534/1618], Loss: 2.4900, Perplexity: 12.0618
Epoch [1/3], Step [1535/1618], Loss: 2.4216, Perplexity: 11.2637
Epoch [1/3], Step [1536/1618], Loss: 2.3883, Perplexity: 10.8953
Epoch [1/3], Step [1537/1618], Loss: 2.1379, Perplexity: 8.4819
Epoch [1/3], Step [1538/1618], Loss: 2.1546, Perplexity: 8.6248
Epoch [1/3], Step [1539/1618], Loss: 2.1906, Perplexity: 8.9405
Epoch [1/3], Step [1540/1618], Loss: 2.3911, Perplexity: 10.9260
Epoch [1/3], Step [1541/1618], Loss: 2.2862, Perplexity: 9.8371
Epoch [1/3], Step [1542/1618], Loss: 2.3300, Perplexity: 10.2775
Epoch [1/3], Step [1543/1618], Loss: 2.1000, Perplexity: 8.1664
Epoch [1/3], Step [1544/1618], Loss: 2.1952, Perplexity: 8.9820
Epoch [1/3], Step [1545/1618], Loss: 2.1102, Perplexity: 8.2495
Epoch [1/3], Step [1546/1618], Loss: 2.2248, Perplexity: 9.2514
Epoch [1/3], Step [1547/1618], Loss: 2.0744, Perplexity: 7.9601
Epoch [1/3], Step [1548/1618], Loss: 2.4482, Perplexity: 11.5672
Epoch [1/3], Step [1549/1618], Loss: 2.3403, Perplexity: 10.3843
Epoch [1/3], Step [1550/1618], Loss: 2.1333, Perplexity: 8.4429
Epoch [1/3], Step [1551/1618], Loss: 2.3672, Perplexity: 10.6676
Epoch [1/3], Step [1552/1618], Loss: 2.0638, Perplexity: 7.8757
Epoch [1/3], Step [1553/1618], Loss: 2.2265, Perplexity: 9.2675
Epoch [1/3], Step [1554/1618], Loss: 2.4940, Perplexity: 12.1092
Epoch [1/3], Step [1555/1618], Loss: 2.2195, Perplexity: 9.2025
Epoch [1/3], Step [1556/1618], Loss: 2.1821, Perplexity: 8.8649
Epoch [1/3], Step [1557/1618], Loss: 3.0262, Perplexity: 20.6178
Epoch [1/3], Step [1558/1618], Loss: 2.2160, Perplexity: 9.1706
Epoch [1/3], Step [1559/1618], Loss: 2.1328, Perplexity: 8.4385
Epoch [1/3], Step [1560/1618], Loss: 2.2958, Perplexity: 9.9323
Epoch [1/3], Step [1561/1618], Loss: 2.2337, Perplexity: 9.3341
Epoch [1/3], Step [1562/1618], Loss: 2.5397, Perplexity: 12.6759
Epoch [1/3], Step [1563/1618], Loss: 2.1803, Perplexity: 8.8490
Epoch [1/3], Step [1564/1618], Loss: 2.3401, Perplexity: 10.3822
Epoch [1/3], Step [1565/1618], Loss: 2.3225, Perplexity: 10.2010
Epoch [1/3], Step [1566/1618], Loss: 2.2148, Perplexity: 9.1593
Epoch [1/3], Step [1567/1618], Loss: 2.2405, Perplexity: 9.3981
Epoch [1/3], Step [1568/1618], Loss: 2.3749, Perplexity: 10.7501
Epoch [1/3], Step [1569/1618], Loss: 2.0710, Perplexity: 7.9331
Epoch [1/3], Step [1570/1618], Loss: 2.4649, Perplexity: 11.7629
Epoch [1/3], Step [1571/1618], Loss: 2.1176, Perplexity: 8.3115
Epoch [1/3], Step [1572/1618], Loss: 2.1784, Perplexity: 8.8323
Epoch [1/3], Step [1573/1618], Loss: 2.4756, Perplexity: 11.8890
Epoch [1/3], Step [1574/1618], Loss: 2.2235, Perplexity: 9.2399
Epoch [1/3], Step [1575/1618], Loss: 2.1034, Perplexity: 8.1941
Epoch [1/3], Step [1576/1618], Loss: 2.1527, Perplexity: 8.6079
Epoch [1/3], Step [1577/1618], Loss: 2.2076, Perplexity: 9.0939
Epoch [1/3], Step [1578/1618], Loss: 2.1382, Perplexity: 8.4842
Epoch [1/3], Step [1579/1618], Loss: 2.5194, Perplexity: 12.4217
Epoch [1/3], Step [1580/1618], Loss: 2.2603, Perplexity: 9.5864
Epoch [1/3], Step [1581/1618], Loss: 2.2112, Perplexity: 9.1269
Epoch [1/3], Step [1582/1618], Loss: 2.2603, Perplexity: 9.5862
Epoch [1/3], Step [1583/1618], Loss: 2.2806, Perplexity: 9.7822
Epoch [1/3], Step [1584/1618], Loss: 2.3209, Perplexity: 10.1852
Epoch [1/3], Step [1585/1618], Loss: 2.1384, Perplexity: 8.4857
Epoch [1/3], Step [1586/1618], Loss: 2.1984, Perplexity: 9.0101
Epoch [1/3], Step [1587/1618], Loss: 2.3664, Perplexity: 10.6590
Epoch [1/3], Step [1588/1618], Loss: 2.0860, Perplexity: 8.0524
Epoch [1/3], Step [1589/1618], Loss: 2.2111, Perplexity: 9.1254
Epoch [1/3], Step [1590/1618], Loss: 2.2930, Perplexity: 9.9042
Epoch [1/3], Step [1591/1618], Loss: 2.2459, Perplexity: 9.4489
Epoch [1/3], Step [1592/1618], Loss: 2.3289, Perplexity: 10.2665
Epoch [1/3], Step [1593/1618], Loss: 2.2131, Perplexity: 9.1440
Epoch [1/3], Step [1594/1618], Loss: 2.1491, Perplexity: 8.5776
Epoch [1/3], Step [1595/1618], Loss: 2.5933, Perplexity: 13.3738
Epoch [1/3], Step [1596/1618], Loss: 2.1846, Perplexity: 8.8873
Epoch [1/3], Step [1597/1618], Loss: 2.2560, Perplexity: 9.5445
Epoch [1/3], Step [1598/1618], Loss: 2.2166, Perplexity: 9.1764
Epoch [1/3], Step [1599/1618], Loss: 2.5689, Perplexity: 13.0513
Epoch [1/3], Step [1600/1618], Loss: 2.0894, Perplexity: 8.0803
Epoch [1/3], Step [1601/1618], Loss: 2.7451, Perplexity: 15.5668
Epoch [1/3], Step [1602/1618], Loss: 2.1238, Perplexity: 8.3629
Epoch [1/3], Step [1603/1618], Loss: 2.3046, Perplexity: 10.0205
Epoch [1/3], Step [1604/1618], Loss: 2.2327, Perplexity: 9.3254
Epoch [1/3], Step [1605/1618], Loss: 2.2429, Perplexity: 9.4204
Epoch [1/3], Step [1606/1618], Loss: 2.1120, Perplexity: 8.2646
Epoch [1/3], Step [1607/1618], Loss: 2.1806, Perplexity: 8.8521
Epoch [1/3], Step [1608/1618], Loss: 2.1722, Perplexity: 8.7779
Epoch [1/3], Step [1609/1618], Loss: 2.1257, Perplexity: 8.3786
Epoch [1/3], Step [1610/1618], Loss: 2.0868, Perplexity: 8.0594
Epoch [1/3], Step [1611/1618], Loss: 2.2555, Perplexity: 9.5397
Epoch [1/3], Step [1612/1618], Loss: 2.2067, Perplexity: 9.0861
Epoch [1/3], Step [1613/1618], Loss: 2.1426, Perplexity: 8.5216
Epoch [1/3], Step [1614/1618], Loss: 2.2133, Perplexity: 9.1462
Epoch [1/3], Step [1615/1618], Loss: 2.4265, Perplexity: 11.3190
Epoch [1/3], Step [1616/1618], Loss: 2.1291, Perplexity: 8.4070
Epoch [1/3], Step [1617/1618], Loss: 2.0765, Perplexity: 7.9765
Epoch [1/3], Step [1618/1618], Loss: 2.1828, Perplexity: 8.8712
Epoch [2/3], Step [1/1618], Loss: 2.1007, Perplexity: 8.1719
Epoch [2/3], Step [2/1618], Loss: 2.0248, Perplexity: 7.5748
Epoch [2/3], Step [3/1618], Loss: 2.2198, Perplexity: 9.2058
Epoch [2/3], Step [4/1618], Loss: 2.4095, Perplexity: 11.1280
Epoch [2/3], Step [5/1618], Loss: 2.1128, Perplexity: 8.2714
Epoch [2/3], Step [6/1618], Loss: 2.0756, Perplexity: 7.9695
Epoch [2/3], Step [7/1618], Loss: 2.1387, Perplexity: 8.4888
Epoch [2/3], Step [8/1618], Loss: 2.0865, Perplexity: 8.0564
Epoch [2/3], Step [9/1618], Loss: 2.0421, Perplexity: 7.7068
Epoch [2/3], Step [10/1618], Loss: 2.3296, Perplexity: 10.2738
Epoch [2/3], Step [11/1618], Loss: 2.1401, Perplexity: 8.4999
Epoch [2/3], Step [12/1618], Loss: 2.2525, Perplexity: 9.5118
Epoch [2/3], Step [13/1618], Loss: 2.2419, Perplexity: 9.4109
Epoch [2/3], Step [14/1618], Loss: 2.1759, Perplexity: 8.8105
Epoch [2/3], Step [15/1618], Loss: 2.2978, Perplexity: 9.9528
Epoch [2/3], Step [16/1618], Loss: 2.2102, Perplexity: 9.1174
Epoch [2/3], Step [17/1618], Loss: 2.2454, Perplexity: 9.4440
Epoch [2/3], Step [18/1618], Loss: 2.2057, Perplexity: 9.0762
Epoch [2/3], Step [19/1618], Loss: 2.2472, Perplexity: 9.4616
Epoch [2/3], Step [20/1618], Loss: 2.1436, Perplexity: 8.5301
Epoch [2/3], Step [21/1618], Loss: 2.3749, Perplexity: 10.7504
Epoch [2/3], Step [22/1618], Loss: 2.2436, Perplexity: 9.4276
Epoch [2/3], Step [23/1618], Loss: 2.1869, Perplexity: 8.9080
Epoch [2/3], Step [24/1618], Loss: 2.2529, Perplexity: 9.5157
Epoch [2/3], Step [25/1618], Loss: 2.0933, Perplexity: 8.1116
Epoch [2/3], Step [26/1618], Loss: 2.1057, Perplexity: 8.2131
Epoch [2/3], Step [27/1618], Loss: 2.1599, Perplexity: 8.6706
Epoch [2/3], Step [28/1618], Loss: 2.1365, Perplexity: 8.4694
Epoch [2/3], Step [29/1618], Loss: 2.1990, Perplexity: 9.0162
Epoch [2/3], Step [30/1618], Loss: 2.0783, Perplexity: 7.9906
Epoch [2/3], Step [31/1618], Loss: 2.1732, Perplexity: 8.7861
Epoch [2/3], Step [32/1618], Loss: 2.1362, Perplexity: 8.4675
Epoch [2/3], Step [33/1618], Loss: 2.3582, Perplexity: 10.5722
Epoch [2/3], Step [34/1618], Loss: 2.7030, Perplexity: 14.9245
Epoch [2/3], Step [35/1618], Loss: 2.1551, Perplexity: 8.6288
Epoch [2/3], Step [36/1618], Loss: 2.2614, Perplexity: 9.5964
Epoch [2/3], Step [37/1618], Loss: 2.0800, Perplexity: 8.0048
Epoch [2/3], Step [38/1618], Loss: 2.1505, Perplexity: 8.5890
Epoch [2/3], Step [39/1618], Loss: 2.0353, Perplexity: 7.6546
Epoch [2/3], Step [40/1618], Loss: 2.1279, Perplexity: 8.3968
Epoch [2/3], Step [41/1618], Loss: 2.3181, Perplexity: 10.1569
Epoch [2/3], Step [42/1618], Loss: 2.4488, Perplexity: 11.5743
Epoch [2/3], Step [43/1618], Loss: 2.3551, Perplexity: 10.5396
Epoch [2/3], Step [44/1618], Loss: 2.1227, Perplexity: 8.3538
Epoch [2/3], Step [45/1618], Loss: 2.1472, Perplexity: 8.5608
Epoch [2/3], Step [46/1618], Loss: 2.1109, Perplexity: 8.2560
Epoch [2/3], Step [47/1618], Loss: 2.2261, Perplexity: 9.2637
Epoch [2/3], Step [48/1618], Loss: 2.1367, Perplexity: 8.4713
Epoch [2/3], Step [49/1618], Loss: 2.1591, Perplexity: 8.6630
Epoch [2/3], Step [50/1618], Loss: 2.4735, Perplexity: 11.8645
Epoch [2/3], Step [51/1618], Loss: 2.2586, Perplexity: 9.5699
Epoch [2/3], Step [52/1618], Loss: 2.1460, Perplexity: 8.5504
Epoch [2/3], Step [53/1618], Loss: 2.1028, Perplexity: 8.1888
Epoch [2/3], Step [54/1618], Loss: 2.1487, Perplexity: 8.5737
Epoch [2/3], Step [55/1618], Loss: 2.2860, Perplexity: 9.8352
Epoch [2/3], Step [56/1618], Loss: 2.2719, Perplexity: 9.6977
Epoch [2/3], Step [57/1618], Loss: 2.1524, Perplexity: 8.6056
Epoch [2/3], Step [58/1618], Loss: 2.2811, Perplexity: 9.7870
Epoch [2/3], Step [59/1618], Loss: 2.0413, Perplexity: 7.7007
Epoch [2/3], Step [60/1618], Loss: 2.0956, Perplexity: 8.1302
Epoch [2/3], Step [61/1618], Loss: 2.1386, Perplexity: 8.4873
Epoch [2/3], Step [62/1618], Loss: 2.1571, Perplexity: 8.6457
Epoch [2/3], Step [63/1618], Loss: 2.1004, Perplexity: 8.1696
Epoch [2/3], Step [64/1618], Loss: 2.1851, Perplexity: 8.8913
Epoch [2/3], Step [65/1618], Loss: 2.1799, Perplexity: 8.8455
Epoch [2/3], Step [66/1618], Loss: 2.2245, Perplexity: 9.2489
Epoch [2/3], Step [67/1618], Loss: 2.1483, Perplexity: 8.5700
Epoch [2/3], Step [68/1618], Loss: 2.8186, Perplexity: 16.7528
Epoch [2/3], Step [69/1618], Loss: 2.1413, Perplexity: 8.5103
Epoch [2/3], Step [70/1618], Loss: 2.6640, Perplexity: 14.3531
Epoch [2/3], Step [71/1618], Loss: 2.2777, Perplexity: 9.7545
Epoch [2/3], Step [72/1618], Loss: 2.0803, Perplexity: 8.0065
Epoch [2/3], Step [73/1618], Loss: 2.1361, Perplexity: 8.4666
Epoch [2/3], Step [74/1618], Loss: 2.1992, Perplexity: 9.0175
Epoch [2/3], Step [75/1618], Loss: 2.2189, Perplexity: 9.1971
Epoch [2/3], Step [76/1618], Loss: 2.2193, Perplexity: 9.2005
Epoch [2/3], Step [77/1618], Loss: 2.1837, Perplexity: 8.8791
Epoch [2/3], Step [78/1618], Loss: 2.2960, Perplexity: 9.9340
Epoch [2/3], Step [79/1618], Loss: 2.4922, Perplexity: 12.0873
Epoch [2/3], Step [80/1618], Loss: 2.3367, Perplexity: 10.3468
Epoch [2/3], Step [81/1618], Loss: 2.0132, Perplexity: 7.4874
Epoch [2/3], Step [82/1618], Loss: 2.1124, Perplexity: 8.2679
Epoch [2/3], Step [83/1618], Loss: 2.1940, Perplexity: 8.9713
Epoch [2/3], Step [84/1618], Loss: 2.0884, Perplexity: 8.0722
Epoch [2/3], Step [85/1618], Loss: 2.2515, Perplexity: 9.5023
Epoch [2/3], Step [86/1618], Loss: 2.2383, Perplexity: 9.3772
Epoch [2/3], Step [87/1618], Loss: 2.1515, Perplexity: 8.5980
Epoch [2/3], Step [88/1618], Loss: 2.0817, Perplexity: 8.0180
Epoch [2/3], Step [89/1618], Loss: 2.1849, Perplexity: 8.8894
Epoch [2/3], Step [90/1618], Loss: 2.1524, Perplexity: 8.6053
Epoch [2/3], Step [91/1618], Loss: 2.1451, Perplexity: 8.5429
Epoch [2/3], Step [92/1618], Loss: 2.0782, Perplexity: 7.9900
Epoch [2/3], Step [93/1618], Loss: 2.1654, Perplexity: 8.7179
Epoch [2/3], Step [94/1618], Loss: 2.0640, Perplexity: 7.8775
Epoch [2/3], Step [95/1618], Loss: 2.1286, Perplexity: 8.4032
Epoch [2/3], Step [96/1618], Loss: 2.4137, Perplexity: 11.1754
Epoch [2/3], Step [97/1618], Loss: 2.3350, Perplexity: 10.3294
Epoch [2/3], Step [98/1618], Loss: 2.4345, Perplexity: 11.4103
Epoch [2/3], Step [99/1618], Loss: 2.5623, Perplexity: 12.9655
Epoch [2/3], Step [100/1618], Loss: 2.2361, Perplexity: 9.3571
Epoch [2/3], Step [101/1618], Loss: 2.2161, Perplexity: 9.1713
Epoch [2/3], Step [102/1618], Loss: 2.1590, Perplexity: 8.6628
Epoch [2/3], Step [103/1618], Loss: 2.1705, Perplexity: 8.7623
Epoch [2/3], Step [104/1618], Loss: 2.3501, Perplexity: 10.4865
Epoch [2/3], Step [105/1618], Loss: 3.0789, Perplexity: 21.7339
Epoch [2/3], Step [106/1618], Loss: 2.3223, Perplexity: 10.1996
Epoch [2/3], Step [107/1618], Loss: 2.1682, Perplexity: 8.7424
Epoch [2/3], Step [108/1618], Loss: 2.1221, Perplexity: 8.3484
Epoch [2/3], Step [109/1618], Loss: 2.2065, Perplexity: 9.0843
Epoch [2/3], Step [110/1618], Loss: 2.2225, Perplexity: 9.2307
Epoch [2/3], Step [111/1618], Loss: 2.5858, Perplexity: 13.2741
Epoch [2/3], Step [112/1618], Loss: 2.1802, Perplexity: 8.8476
Epoch [2/3], Step [113/1618], Loss: 2.2221, Perplexity: 9.2267
Epoch [2/3], Step [114/1618], Loss: 2.4420, Perplexity: 11.4955
Epoch [2/3], Step [115/1618], Loss: 2.3715, Perplexity: 10.7134
Epoch [2/3], Step [116/1618], Loss: 2.1908, Perplexity: 8.9428
Epoch [2/3], Step [117/1618], Loss: 2.1695, Perplexity: 8.7541
Epoch [2/3], Step [118/1618], Loss: 2.2681, Perplexity: 9.6607
Epoch [2/3], Step [119/1618], Loss: 2.1781, Perplexity: 8.8294
Epoch [2/3], Step [120/1618], Loss: 2.3534, Perplexity: 10.5208
Epoch [2/3], Step [121/1618], Loss: 2.7952, Perplexity: 16.3664
Epoch [2/3], Step [122/1618], Loss: 2.5080, Perplexity: 12.2808
Epoch [2/3], Step [123/1618], Loss: 2.3387, Perplexity: 10.3677
Epoch [2/3], Step [124/1618], Loss: 2.2288, Perplexity: 9.2890
Epoch [2/3], Step [125/1618], Loss: 2.1278, Perplexity: 8.3962
Epoch [2/3], Step [126/1618], Loss: 2.1475, Perplexity: 8.5632
Epoch [2/3], Step [127/1618], Loss: 2.4608, Perplexity: 11.7145
Epoch [2/3], Step [128/1618], Loss: 2.2813, Perplexity: 9.7893
Epoch [2/3], Step [129/1618], Loss: 2.1484, Perplexity: 8.5713
Epoch [2/3], Step [130/1618], Loss: 2.8074, Perplexity: 16.5664
Epoch [2/3], Step [131/1618], Loss: 2.2016, Perplexity: 9.0396
Epoch [2/3], Step [132/1618], Loss: 2.1550, Perplexity: 8.6280
Epoch [2/3], Step [133/1618], Loss: 2.1433, Perplexity: 8.5279
Epoch [2/3], Step [134/1618], Loss: 2.1656, Perplexity: 8.7197
Epoch [2/3], Step [135/1618], Loss: 2.0880, Perplexity: 8.0690
Epoch [2/3], Step [136/1618], Loss: 2.0971, Perplexity: 8.1427
Epoch [2/3], Step [137/1618], Loss: 2.1421, Perplexity: 8.5173
Epoch [2/3], Step [138/1618], Loss: 2.1782, Perplexity: 8.8300
Epoch [2/3], Step [139/1618], Loss: 2.0655, Perplexity: 7.8889
Epoch [2/3], Step [140/1618], Loss: 2.1340, Perplexity: 8.4482
Epoch [2/3], Step [141/1618], Loss: 2.1138, Perplexity: 8.2794
Epoch [2/3], Step [142/1618], Loss: 2.2745, Perplexity: 9.7230
Epoch [2/3], Step [143/1618], Loss: 2.3868, Perplexity: 10.8785
Epoch [2/3], Step [144/1618], Loss: 2.1712, Perplexity: 8.7684
Epoch [2/3], Step [145/1618], Loss: 2.2534, Perplexity: 9.5201
Epoch [2/3], Step [146/1618], Loss: 2.1893, Perplexity: 8.9290
Epoch [2/3], Step [147/1618], Loss: 2.1319, Perplexity: 8.4308
Epoch [2/3], Step [148/1618], Loss: 2.4176, Perplexity: 11.2189
Epoch [2/3], Step [149/1618], Loss: 2.4994, Perplexity: 12.1758
Epoch [2/3], Step [150/1618], Loss: 2.3463, Perplexity: 10.4473
Epoch [2/3], Step [151/1618], Loss: 2.2329, Perplexity: 9.3270
Epoch [2/3], Step [152/1618], Loss: 2.0659, Perplexity: 7.8924
Epoch [2/3], Step [153/1618], Loss: 2.0719, Perplexity: 7.9399
Epoch [2/3], Step [154/1618], Loss: 2.2668, Perplexity: 9.6487
Epoch [2/3], Step [155/1618], Loss: 2.2161, Perplexity: 9.1713
Epoch [2/3], Step [156/1618], Loss: 2.1565, Perplexity: 8.6408
Epoch [2/3], Step [157/1618], Loss: 2.2336, Perplexity: 9.3336
Epoch [2/3], Step [158/1618], Loss: 2.1152, Perplexity: 8.2912
Epoch [2/3], Step [159/1618], Loss: 2.1489, Perplexity: 8.5754
Epoch [2/3], Step [160/1618], Loss: 2.2603, Perplexity: 9.5857
Epoch [2/3], Step [161/1618], Loss: 2.2115, Perplexity: 9.1291
Epoch [2/3], Step [162/1618], Loss: 2.8593, Perplexity: 17.4486
Epoch [2/3], Step [163/1618], Loss: 2.0753, Perplexity: 7.9672
Epoch [2/3], Step [164/1618], Loss: 2.2314, Perplexity: 9.3132
Epoch [2/3], Step [165/1618], Loss: 2.1848, Perplexity: 8.8889
Epoch [2/3], Step [166/1618], Loss: 2.1929, Perplexity: 8.9616
Epoch [2/3], Step [167/1618], Loss: 2.1379, Perplexity: 8.4818
Epoch [2/3], Step [168/1618], Loss: 2.0265, Perplexity: 7.5878
Epoch [2/3], Step [169/1618], Loss: 2.2447, Perplexity: 9.4375
Epoch [2/3], Step [170/1618], Loss: 2.1935, Perplexity: 8.9661
Epoch [2/3], Step [171/1618], Loss: 2.2466, Perplexity: 9.4554
Epoch [2/3], Step [172/1618], Loss: 2.2271, Perplexity: 9.2726
Epoch [2/3], Step [173/1618], Loss: 2.1925, Perplexity: 8.9574
Epoch [2/3], Step [174/1618], Loss: 2.2699, Perplexity: 9.6785
Epoch [2/3], Step [175/1618], Loss: 2.3556, Perplexity: 10.5442
Epoch [2/3], Step [176/1618], Loss: 2.1838, Perplexity: 8.8799
Epoch [2/3], Step [177/1618], Loss: 2.1904, Perplexity: 8.9384
Epoch [2/3], Step [178/1618], Loss: 3.0059, Perplexity: 20.2036
Epoch [2/3], Step [179/1618], Loss: 2.1793, Perplexity: 8.8405
Epoch [2/3], Step [180/1618], Loss: 2.1760, Perplexity: 8.8109
Epoch [2/3], Step [181/1618], Loss: 2.0389, Perplexity: 7.6821
Epoch [2/3], Step [182/1618], Loss: 2.1508, Perplexity: 8.5918
Epoch [2/3], Step [183/1618], Loss: 2.5068, Perplexity: 12.2658
Epoch [2/3], Step [184/1618], Loss: 2.2868, Perplexity: 9.8438
Epoch [2/3], Step [185/1618], Loss: 2.0224, Perplexity: 7.5567
Epoch [2/3], Step [186/1618], Loss: 2.1190, Perplexity: 8.3226
Epoch [2/3], Step [187/1618], Loss: 2.1801, Perplexity: 8.8473
Epoch [2/3], Step [188/1618], Loss: 2.1479, Perplexity: 8.5667
Epoch [2/3], Step [189/1618], Loss: 2.6493, Perplexity: 14.1439
Epoch [2/3], Step [190/1618], Loss: 2.3327, Perplexity: 10.3060
Epoch [2/3], Step [191/1618], Loss: 2.2853, Perplexity: 9.8284
Epoch [2/3], Step [192/1618], Loss: 2.0768, Perplexity: 7.9791
Epoch [2/3], Step [193/1618], Loss: 2.1826, Perplexity: 8.8697
Epoch [2/3], Step [194/1618], Loss: 2.0819, Perplexity: 8.0198
Epoch [2/3], Step [195/1618], Loss: 2.2097, Perplexity: 9.1128
Epoch [2/3], Step [196/1618], Loss: 2.0754, Perplexity: 7.9677
Epoch [2/3], Step [197/1618], Loss: 3.3928, Perplexity: 29.7480
Epoch [2/3], Step [198/1618], Loss: 2.2023, Perplexity: 9.0460
Epoch [2/3], Step [199/1618], Loss: 2.1626, Perplexity: 8.6940
Epoch [2/3], Step [200/1618], Loss: 2.5696, Perplexity: 13.0612
Epoch [2/3], Step [201/1618], Loss: 2.1716, Perplexity: 8.7726
Epoch [2/3], Step [202/1618], Loss: 2.1195, Perplexity: 8.3267
Epoch [2/3], Step [203/1618], Loss: 2.1144, Perplexity: 8.2849
Epoch [2/3], Step [204/1618], Loss: 2.1251, Perplexity: 8.3737
Epoch [2/3], Step [205/1618], Loss: 2.3213, Perplexity: 10.1889
Epoch [2/3], Step [206/1618], Loss: 2.1296, Perplexity: 8.4112
Epoch [2/3], Step [207/1618], Loss: 2.3507, Perplexity: 10.4934
Epoch [2/3], Step [208/1618], Loss: 2.1437, Perplexity: 8.5311
Epoch [2/3], Step [209/1618], Loss: 2.1603, Perplexity: 8.6738
Epoch [2/3], Step [210/1618], Loss: 2.2438, Perplexity: 9.4291
Epoch [2/3], Step [211/1618], Loss: 2.1304, Perplexity: 8.4178
Epoch [2/3], Step [212/1618], Loss: 2.3030, Perplexity: 10.0044
Epoch [2/3], Step [213/1618], Loss: 2.2163, Perplexity: 9.1730
Epoch [2/3], Step [214/1618], Loss: 2.2623, Perplexity: 9.6054
Epoch [2/3], Step [215/1618], Loss: 2.6478, Perplexity: 14.1231
Epoch [2/3], Step [216/1618], Loss: 2.1271, Perplexity: 8.3906
Epoch [2/3], Step [217/1618], Loss: 2.1693, Perplexity: 8.7526
Epoch [2/3], Step [218/1618], Loss: 2.1818, Perplexity: 8.8623
Epoch [2/3], Step [219/1618], Loss: 2.1389, Perplexity: 8.4903
Epoch [2/3], Step [220/1618], Loss: 2.3313, Perplexity: 10.2910
Epoch [2/3], Step [221/1618], Loss: 2.0453, Perplexity: 7.7316
Epoch [2/3], Step [222/1618], Loss: 2.0915, Perplexity: 8.0969
Epoch [2/3], Step [223/1618], Loss: 2.1125, Perplexity: 8.2691
Epoch [2/3], Step [224/1618], Loss: 2.0222, Perplexity: 7.5553
Epoch [2/3], Step [225/1618], Loss: 2.0869, Perplexity: 8.0596
Epoch [2/3], Step [226/1618], Loss: 2.0692, Perplexity: 7.9188
Epoch [2/3], Step [227/1618], Loss: 2.1992, Perplexity: 9.0178
Epoch [2/3], Step [228/1618], Loss: 2.4184, Perplexity: 11.2284
Epoch [2/3], Step [229/1618], Loss: 2.3666, Perplexity: 10.6607
Epoch [2/3], Step [230/1618], Loss: 2.1711, Perplexity: 8.7683
Epoch [2/3], Step [231/1618], Loss: 2.0822, Perplexity: 8.0223
Epoch [2/3], Step [232/1618], Loss: 2.3251, Perplexity: 10.2276
Epoch [2/3], Step [233/1618], Loss: 2.0906, Perplexity: 8.0895
Epoch [2/3], Step [234/1618], Loss: 2.0917, Perplexity: 8.0990
Epoch [2/3], Step [235/1618], Loss: 2.1154, Perplexity: 8.2933
Epoch [2/3], Step [236/1618], Loss: 2.1867, Perplexity: 8.9054
Epoch [2/3], Step [237/1618], Loss: 2.1496, Perplexity: 8.5817
Epoch [2/3], Step [238/1618], Loss: 2.1700, Perplexity: 8.7579
Epoch [2/3], Step [239/1618], Loss: 2.1783, Perplexity: 8.8313
Epoch [2/3], Step [240/1618], Loss: 2.1223, Perplexity: 8.3501
Epoch [2/3], Step [241/1618], Loss: 2.1765, Perplexity: 8.8154
Epoch [2/3], Step [242/1618], Loss: 2.1304, Perplexity: 8.4185
Epoch [2/3], Step [243/1618], Loss: 2.0014, Perplexity: 7.3994
Epoch [2/3], Step [244/1618], Loss: 2.0395, Perplexity: 7.6865
Epoch [2/3], Step [245/1618], Loss: 2.0672, Perplexity: 7.9026
Epoch [2/3], Step [246/1618], Loss: 2.1435, Perplexity: 8.5295
Epoch [2/3], Step [247/1618], Loss: 2.0710, Perplexity: 7.9331
Epoch [2/3], Step [248/1618], Loss: 2.1427, Perplexity: 8.5220
Epoch [2/3], Step [249/1618], Loss: 2.1025, Perplexity: 8.1864
Epoch [2/3], Step [250/1618], Loss: 2.0584, Perplexity: 7.8333
Epoch [2/3], Step [251/1618], Loss: 2.0841, Perplexity: 8.0376
Epoch [2/3], Step [252/1618], Loss: 2.0770, Perplexity: 7.9805
Epoch [2/3], Step [253/1618], Loss: 2.6654, Perplexity: 14.3734
Epoch [2/3], Step [254/1618], Loss: 2.1159, Perplexity: 8.2972
Epoch [2/3], Step [255/1618], Loss: 2.2612, Perplexity: 9.5943
Epoch [2/3], Step [256/1618], Loss: 2.2037, Perplexity: 9.0584
Epoch [2/3], Step [257/1618], Loss: 2.1460, Perplexity: 8.5502
Epoch [2/3], Step [258/1618], Loss: 2.1769, Perplexity: 8.8191
Epoch [2/3], Step [259/1618], Loss: 2.0953, Perplexity: 8.1283
Epoch [2/3], Step [260/1618], Loss: 2.0463, Perplexity: 7.7393
Epoch [2/3], Step [261/1618], Loss: 2.0519, Perplexity: 7.7824
Epoch [2/3], Step [262/1618], Loss: 2.2234, Perplexity: 9.2383
Epoch [2/3], Step [263/1618], Loss: 2.1543, Perplexity: 8.6221
Epoch [2/3], Step [264/1618], Loss: 2.0478, Perplexity: 7.7507
Epoch [2/3], Step [265/1618], Loss: 2.1725, Perplexity: 8.7801
Epoch [2/3], Step [266/1618], Loss: 2.1590, Perplexity: 8.6626
Epoch [2/3], Step [267/1618], Loss: 2.0673, Perplexity: 7.9036
Epoch [2/3], Step [268/1618], Loss: 2.3177, Perplexity: 10.1527
Epoch [2/3], Step [269/1618], Loss: 2.1342, Perplexity: 8.4507
Epoch [2/3], Step [270/1618], Loss: 2.5277, Perplexity: 12.5247
Epoch [2/3], Step [271/1618], Loss: 2.1934, Perplexity: 8.9660
Epoch [2/3], Step [272/1618], Loss: 2.2267, Perplexity: 9.2692
Epoch [2/3], Step [273/1618], Loss: 2.0341, Perplexity: 7.6450
Epoch [2/3], Step [274/1618], Loss: 2.0599, Perplexity: 7.8454
Epoch [2/3], Step [275/1618], Loss: 2.1105, Perplexity: 8.2522
Epoch [2/3], Step [276/1618], Loss: 2.1636, Perplexity: 8.7024
Epoch [2/3], Step [277/1618], Loss: 2.2254, Perplexity: 9.2572
Epoch [2/3], Step [278/1618], Loss: 2.1163, Perplexity: 8.3003
Epoch [2/3], Step [279/1618], Loss: 2.1559, Perplexity: 8.6354
Epoch [2/3], Step [280/1618], Loss: 2.1512, Perplexity: 8.5953
Epoch [2/3], Step [281/1618], Loss: 2.1634, Perplexity: 8.7007
Epoch [2/3], Step [282/1618], Loss: 2.0897, Perplexity: 8.0822
Epoch [2/3], Step [283/1618], Loss: 2.4362, Perplexity: 11.4295
Epoch [2/3], Step [284/1618], Loss: 2.2459, Perplexity: 9.4487
Epoch [2/3], Step [285/1618], Loss: 2.1095, Perplexity: 8.2442
Epoch [2/3], Step [286/1618], Loss: 2.2434, Perplexity: 9.4253
Epoch [2/3], Step [287/1618], Loss: 2.1853, Perplexity: 8.8935
Epoch [2/3], Step [288/1618], Loss: 2.0440, Perplexity: 7.7211
Epoch [2/3], Step [289/1618], Loss: 2.1039, Perplexity: 8.1983
Epoch [2/3], Step [290/1618], Loss: 2.0841, Perplexity: 8.0377
Epoch [2/3], Step [291/1618], Loss: 2.2196, Perplexity: 9.2035
Epoch [2/3], Step [292/1618], Loss: 2.1109, Perplexity: 8.2559
Epoch [2/3], Step [293/1618], Loss: 2.2830, Perplexity: 9.8062
Epoch [2/3], Step [294/1618], Loss: 2.4776, Perplexity: 11.9121
Epoch [2/3], Step [295/1618], Loss: 2.2715, Perplexity: 9.6940
Epoch [2/3], Step [296/1618], Loss: 2.0642, Perplexity: 7.8791
Epoch [2/3], Step [297/1618], Loss: 2.0092, Perplexity: 7.4577
Epoch [2/3], Step [298/1618], Loss: 2.1127, Perplexity: 8.2704
Epoch [2/3], Step [299/1618], Loss: 2.2463, Perplexity: 9.4529
Epoch [2/3], Step [300/1618], Loss: 2.5974, Perplexity: 13.4284
Epoch [2/3], Step [301/1618], Loss: 2.0831, Perplexity: 8.0297
Epoch [2/3], Step [302/1618], Loss: 2.6518, Perplexity: 14.1796
Epoch [2/3], Step [303/1618], Loss: 2.4955, Perplexity: 12.1278
Epoch [2/3], Step [304/1618], Loss: 2.1215, Perplexity: 8.3433
Epoch [2/3], Step [305/1618], Loss: 2.2854, Perplexity: 9.8300
Epoch [2/3], Step [306/1618], Loss: 2.1448, Perplexity: 8.5407
Epoch [2/3], Step [307/1618], Loss: 2.1077, Perplexity: 8.2295
Epoch [2/3], Step [308/1618], Loss: 2.5805, Perplexity: 13.2044
Epoch [2/3], Step [309/1618], Loss: 2.0729, Perplexity: 7.9480
Epoch [2/3], Step [310/1618], Loss: 2.1902, Perplexity: 8.9366
Epoch [2/3], Step [311/1618], Loss: 2.4318, Perplexity: 11.3793
Epoch [2/3], Step [312/1618], Loss: 2.2376, Perplexity: 9.3704
Epoch [2/3], Step [313/1618], Loss: 2.1625, Perplexity: 8.6931
Epoch [2/3], Step [314/1618], Loss: 2.1366, Perplexity: 8.4710
Epoch [2/3], Step [315/1618], Loss: 2.1027, Perplexity: 8.1880
Epoch [2/3], Step [316/1618], Loss: 2.1090, Perplexity: 8.2402
Epoch [2/3], Step [317/1618], Loss: 2.3639, Perplexity: 10.6321
Epoch [2/3], Step [318/1618], Loss: 2.8753, Perplexity: 17.7315
Epoch [2/3], Step [319/1618], Loss: 2.1964, Perplexity: 8.9923
Epoch [2/3], Step [320/1618], Loss: 2.0795, Perplexity: 8.0008
Epoch [2/3], Step [321/1618], Loss: 1.9675, Perplexity: 7.1530
Epoch [2/3], Step [322/1618], Loss: 2.0434, Perplexity: 7.7170
Epoch [2/3], Step [323/1618], Loss: 2.0684, Perplexity: 7.9124
Epoch [2/3], Step [324/1618], Loss: 2.1563, Perplexity: 8.6390
Epoch [2/3], Step [325/1618], Loss: 2.1712, Perplexity: 8.7687
Epoch [2/3], Step [326/1618], Loss: 2.1946, Perplexity: 8.9764
Epoch [2/3], Step [327/1618], Loss: 2.1339, Perplexity: 8.4481
Epoch [2/3], Step [328/1618], Loss: 2.1327, Perplexity: 8.4379
Epoch [2/3], Step [329/1618], Loss: 2.5885, Perplexity: 13.3096
Epoch [2/3], Step [330/1618], Loss: 2.1106, Perplexity: 8.2533
Epoch [2/3], Step [331/1618], Loss: 2.6804, Perplexity: 14.5914
Epoch [2/3], Step [332/1618], Loss: 2.1252, Perplexity: 8.3750
Epoch [2/3], Step [333/1618], Loss: 2.5453, Perplexity: 12.7476
Epoch [2/3], Step [334/1618], Loss: 2.1292, Perplexity: 8.4082
Epoch [2/3], Step [335/1618], Loss: 2.3102, Perplexity: 10.0760
Epoch [2/3], Step [336/1618], Loss: 2.1227, Perplexity: 8.3534
Epoch [2/3], Step [337/1618], Loss: 2.2507, Perplexity: 9.4943
Epoch [2/3], Step [338/1618], Loss: 2.0879, Perplexity: 8.0678
Epoch [2/3], Step [339/1618], Loss: 2.3286, Perplexity: 10.2639
Epoch [2/3], Step [340/1618], Loss: 2.1100, Perplexity: 8.2483
Epoch [2/3], Step [341/1618], Loss: 2.1286, Perplexity: 8.4027
Epoch [2/3], Step [342/1618], Loss: 2.0848, Perplexity: 8.0427
Epoch [2/3], Step [343/1618], Loss: 2.1310, Perplexity: 8.4229
Epoch [2/3], Step [344/1618], Loss: 2.0457, Perplexity: 7.7345
Epoch [2/3], Step [345/1618], Loss: 2.1688, Perplexity: 8.7477
Epoch [2/3], Step [346/1618], Loss: 2.1341, Perplexity: 8.4499
Epoch [2/3], Step [347/1618], Loss: 2.1998, Perplexity: 9.0228
Epoch [2/3], Step [348/1618], Loss: 2.0185, Perplexity: 7.5273
Epoch [2/3], Step [349/1618], Loss: 2.0571, Perplexity: 7.8234
Epoch [2/3], Step [350/1618], Loss: 2.1334, Perplexity: 8.4439
Epoch [2/3], Step [351/1618], Loss: 2.4218, Perplexity: 11.2656
Epoch [2/3], Step [352/1618], Loss: 3.7762, Perplexity: 43.6519
Epoch [2/3], Step [353/1618], Loss: 2.0867, Perplexity: 8.0584
Epoch [2/3], Step [354/1618], Loss: 2.0889, Perplexity: 8.0763
Epoch [2/3], Step [355/1618], Loss: 2.0377, Perplexity: 7.6726
Epoch [2/3], Step [356/1618], Loss: 2.0166, Perplexity: 7.5130
Epoch [2/3], Step [357/1618], Loss: 2.1657, Perplexity: 8.7211
Epoch [2/3], Step [358/1618], Loss: 2.1216, Perplexity: 8.3442
Epoch [2/3], Step [359/1618], Loss: 2.2373, Perplexity: 9.3676
Epoch [2/3], Step [360/1618], Loss: 2.0978, Perplexity: 8.1478
Epoch [2/3], Step [361/1618], Loss: 2.1321, Perplexity: 8.4323
Epoch [2/3], Step [362/1618], Loss: 2.0911, Perplexity: 8.0937
Epoch [2/3], Step [363/1618], Loss: 2.0949, Perplexity: 8.1247
Epoch [2/3], Step [364/1618], Loss: 2.4762, Perplexity: 11.8966
Epoch [2/3], Step [365/1618], Loss: 2.2181, Perplexity: 9.1899
Epoch [2/3], Step [366/1618], Loss: 2.2696, Perplexity: 9.6758
Epoch [2/3], Step [367/1618], Loss: 2.0615, Perplexity: 7.8578
Epoch [2/3], Step [368/1618], Loss: 2.0459, Perplexity: 7.7365
Epoch [2/3], Step [369/1618], Loss: 2.1331, Perplexity: 8.4408
Epoch [2/3], Step [370/1618], Loss: 2.1710, Perplexity: 8.7666
Epoch [2/3], Step [371/1618], Loss: 2.2197, Perplexity: 9.2044
Epoch [2/3], Step [372/1618], Loss: 2.1128, Perplexity: 8.2711
Epoch [2/3], Step [373/1618], Loss: 2.0598, Perplexity: 7.8444
Epoch [2/3], Step [374/1618], Loss: 2.0695, Perplexity: 7.9209
Epoch [2/3], Step [375/1618], Loss: 2.0776, Perplexity: 7.9855
Epoch [2/3], Step [376/1618], Loss: 2.0915, Perplexity: 8.0974
Epoch [2/3], Step [377/1618], Loss: 1.9919, Perplexity: 7.3292
Epoch [2/3], Step [378/1618], Loss: 2.0763, Perplexity: 7.9748
Epoch [2/3], Step [379/1618], Loss: 2.5610, Perplexity: 12.9487
Epoch [2/3], Step [380/1618], Loss: 2.0219, Perplexity: 7.5523
Epoch [2/3], Step [381/1618], Loss: 2.4458, Perplexity: 11.5400
Epoch [2/3], Step [382/1618], Loss: 2.3310, Perplexity: 10.2882
Epoch [2/3], Step [383/1618], Loss: 2.2948, Perplexity: 9.9227
Epoch [2/3], Step [384/1618], Loss: 2.1702, Perplexity: 8.7602
Epoch [2/3], Step [385/1618], Loss: 2.3204, Perplexity: 10.1796
Epoch [2/3], Step [386/1618], Loss: 1.9989, Perplexity: 7.3807
Epoch [2/3], Step [387/1618], Loss: 2.1144, Perplexity: 8.2850
Epoch [2/3], Step [388/1618], Loss: 2.0309, Perplexity: 7.6208
Epoch [2/3], Step [389/1618], Loss: 1.9873, Perplexity: 7.2959
Epoch [2/3], Step [390/1618], Loss: 2.0665, Perplexity: 7.8969
Epoch [2/3], Step [391/1618], Loss: 2.0143, Perplexity: 7.4954
Epoch [2/3], Step [392/1618], Loss: 1.9480, Perplexity: 7.0143
Epoch [2/3], Step [393/1618], Loss: 2.0479, Perplexity: 7.7514
Epoch [2/3], Step [394/1618], Loss: 2.2060, Perplexity: 9.0794
Epoch [2/3], Step [395/1618], Loss: 2.2276, Perplexity: 9.2773
Epoch [2/3], Step [396/1618], Loss: 1.9643, Perplexity: 7.1302
Epoch [2/3], Step [397/1618], Loss: 2.1556, Perplexity: 8.6334
Epoch [2/3], Step [398/1618], Loss: 2.2465, Perplexity: 9.4548
Epoch [2/3], Step [399/1618], Loss: 2.3088, Perplexity: 10.0628
Epoch [2/3], Step [400/1618], Loss: 2.1472, Perplexity: 8.5610
Epoch [2/3], Step [401/1618], Loss: 2.1810, Perplexity: 8.8550
Epoch [2/3], Step [402/1618], Loss: 1.9999, Perplexity: 7.3885
Epoch [2/3], Step [403/1618], Loss: 2.1988, Perplexity: 9.0143
Epoch [2/3], Step [404/1618], Loss: 1.9666, Perplexity: 7.1464
Epoch [2/3], Step [405/1618], Loss: 2.1141, Perplexity: 8.2820
Epoch [2/3], Step [406/1618], Loss: 2.1435, Perplexity: 8.5292
Epoch [2/3], Step [407/1618], Loss: 2.0184, Perplexity: 7.5262
Epoch [2/3], Step [408/1618], Loss: 2.1376, Perplexity: 8.4788
Epoch [2/3], Step [409/1618], Loss: 2.3042, Perplexity: 10.0162
Epoch [2/3], Step [410/1618], Loss: 2.1028, Perplexity: 8.1894
Epoch [2/3], Step [411/1618], Loss: 2.1958, Perplexity: 8.9873
Epoch [2/3], Step [412/1618], Loss: 2.0234, Perplexity: 7.5641
Epoch [2/3], Step [413/1618], Loss: 2.0698, Perplexity: 7.9231
Epoch [2/3], Step [414/1618], Loss: 2.2240, Perplexity: 9.2440
Epoch [2/3], Step [415/1618], Loss: 2.1444, Perplexity: 8.5370
Epoch [2/3], Step [416/1618], Loss: 1.9901, Perplexity: 7.3165
Epoch [2/3], Step [417/1618], Loss: 2.0664, Perplexity: 7.8962
Epoch [2/3], Step [418/1618], Loss: 2.1657, Perplexity: 8.7210
Epoch [2/3], Step [419/1618], Loss: 2.2810, Perplexity: 9.7863
Epoch [2/3], Step [420/1618], Loss: 2.0739, Perplexity: 7.9554
Epoch [2/3], Step [421/1618], Loss: 2.0974, Perplexity: 8.1450
Epoch [2/3], Step [422/1618], Loss: 2.0131, Perplexity: 7.4861
Epoch [2/3], Step [423/1618], Loss: 2.5917, Perplexity: 13.3520
Epoch [2/3], Step [424/1618], Loss: 2.1137, Perplexity: 8.2788
Epoch [2/3], Step [425/1618], Loss: 2.3011, Perplexity: 9.9848
Epoch [2/3], Step [426/1618], Loss: 2.1846, Perplexity: 8.8871
Epoch [2/3], Step [427/1618], Loss: 2.4579, Perplexity: 11.6800
Epoch [2/3], Step [428/1618], Loss: 2.1047, Perplexity: 8.2045
Epoch [2/3], Step [429/1618], Loss: 2.0955, Perplexity: 8.1297
Epoch [2/3], Step [430/1618], Loss: 2.2233, Perplexity: 9.2378
Epoch [2/3], Step [431/1618], Loss: 2.1469, Perplexity: 8.5585
Epoch [2/3], Step [432/1618], Loss: 2.0866, Perplexity: 8.0579
Epoch [2/3], Step [433/1618], Loss: 2.0318, Perplexity: 7.6280
Epoch [2/3], Step [434/1618], Loss: 2.0216, Perplexity: 7.5505
Epoch [2/3], Step [435/1618], Loss: 2.8195, Perplexity: 16.7677
Epoch [2/3], Step [436/1618], Loss: 2.0184, Perplexity: 7.5266
Epoch [2/3], Step [437/1618], Loss: 2.0885, Perplexity: 8.0730
Epoch [2/3], Step [438/1618], Loss: 2.3858, Perplexity: 10.8674
Epoch [2/3], Step [439/1618], Loss: 2.2231, Perplexity: 9.2363
Epoch [2/3], Step [440/1618], Loss: 2.1670, Perplexity: 8.7323
Epoch [2/3], Step [441/1618], Loss: 2.6532, Perplexity: 14.1995
Epoch [2/3], Step [442/1618], Loss: 2.1128, Perplexity: 8.2715
Epoch [2/3], Step [443/1618], Loss: 2.0989, Perplexity: 8.1569
Epoch [2/3], Step [444/1618], Loss: 2.0905, Perplexity: 8.0887
Epoch [2/3], Step [445/1618], Loss: 2.0722, Perplexity: 7.9421
Epoch [2/3], Step [446/1618], Loss: 2.0735, Perplexity: 7.9527
Epoch [2/3], Step [447/1618], Loss: 2.1701, Perplexity: 8.7591
Epoch [2/3], Step [448/1618], Loss: 2.1247, Perplexity: 8.3706
Epoch [2/3], Step [449/1618], Loss: 2.0008, Perplexity: 7.3949
Epoch [2/3], Step [450/1618], Loss: 2.3624, Perplexity: 10.6168
Epoch [2/3], Step [451/1618], Loss: 2.1521, Perplexity: 8.6029
Epoch [2/3], Step [452/1618], Loss: 2.1803, Perplexity: 8.8486
Epoch [2/3], Step [453/1618], Loss: 2.1507, Perplexity: 8.5906
Epoch [2/3], Step [454/1618], Loss: 2.2515, Perplexity: 9.5015
Epoch [2/3], Step [455/1618], Loss: 2.2481, Perplexity: 9.4693
Epoch [2/3], Step [456/1618], Loss: 2.1083, Perplexity: 8.2346
Epoch [2/3], Step [457/1618], Loss: 2.0600, Perplexity: 7.8461
Epoch [2/3], Step [458/1618], Loss: 2.1130, Perplexity: 8.2727
Epoch [2/3], Step [459/1618], Loss: 2.1993, Perplexity: 9.0187
Epoch [2/3], Step [460/1618], Loss: 2.4012, Perplexity: 11.0362
Epoch [2/3], Step [461/1618], Loss: 2.0586, Perplexity: 7.8347
Epoch [2/3], Step [462/1618], Loss: 2.3229, Perplexity: 10.2050
Epoch [2/3], Step [463/1618], Loss: 2.3150, Perplexity: 10.1250
Epoch [2/3], Step [464/1618], Loss: 2.1054, Perplexity: 8.2100
Epoch [2/3], Step [465/1618], Loss: 2.1247, Perplexity: 8.3703
Epoch [2/3], Step [466/1618], Loss: 2.0845, Perplexity: 8.0409
Epoch [2/3], Step [467/1618], Loss: 2.0732, Perplexity: 7.9501
Epoch [2/3], Step [468/1618], Loss: 2.3378, Perplexity: 10.3589
Epoch [2/3], Step [469/1618], Loss: 2.1290, Perplexity: 8.4061
Epoch [2/3], Step [470/1618], Loss: 2.1601, Perplexity: 8.6720
Epoch [2/3], Step [471/1618], Loss: 2.1988, Perplexity: 9.0141
Epoch [2/3], Step [472/1618], Loss: 2.0678, Perplexity: 7.9077
Epoch [2/3], Step [473/1618], Loss: 2.1133, Perplexity: 8.2753
Epoch [2/3], Step [474/1618], Loss: 2.5679, Perplexity: 13.0390
Epoch [2/3], Step [475/1618], Loss: 2.1134, Perplexity: 8.2765
Epoch [2/3], Step [476/1618], Loss: 2.1746, Perplexity: 8.7983
Epoch [2/3], Step [477/1618], Loss: 2.1369, Perplexity: 8.4732
Epoch [2/3], Step [478/1618], Loss: 2.1291, Perplexity: 8.4072
Epoch [2/3], Step [479/1618], Loss: 2.8470, Perplexity: 17.2354
Epoch [2/3], Step [480/1618], Loss: 2.1564, Perplexity: 8.6403
Epoch [2/3], Step [481/1618], Loss: 2.2494, Perplexity: 9.4823
Epoch [2/3], Step [482/1618], Loss: 2.1529, Perplexity: 8.6100
Epoch [2/3], Step [483/1618], Loss: 2.1986, Perplexity: 9.0125
Epoch [2/3], Step [484/1618], Loss: 1.9778, Perplexity: 7.2271
Epoch [2/3], Step [485/1618], Loss: 2.0269, Perplexity: 7.5902
Epoch [2/3], Step [486/1618], Loss: 2.1246, Perplexity: 8.3693
Epoch [2/3], Step [487/1618], Loss: 1.9476, Perplexity: 7.0118
Epoch [2/3], Step [488/1618], Loss: 2.1110, Perplexity: 8.2566
Epoch [2/3], Step [489/1618], Loss: 2.1809, Perplexity: 8.8540
Epoch [2/3], Step [490/1618], Loss: 2.1756, Perplexity: 8.8072
Epoch [2/3], Step [491/1618], Loss: 2.0024, Perplexity: 7.4067
Epoch [2/3], Step [492/1618], Loss: 2.0663, Perplexity: 7.8959
Epoch [2/3], Step [493/1618], Loss: 2.2673, Perplexity: 9.6532
Epoch [2/3], Step [494/1618], Loss: 2.0325, Perplexity: 7.6330
Epoch [2/3], Step [495/1618], Loss: 2.0360, Perplexity: 7.6597
Epoch [2/3], Step [496/1618], Loss: 2.0325, Perplexity: 7.6332
Epoch [2/3], Step [497/1618], Loss: 2.0535, Perplexity: 7.7953
Epoch [2/3], Step [498/1618], Loss: 2.1744, Perplexity: 8.7973
Epoch [2/3], Step [499/1618], Loss: 2.1540, Perplexity: 8.6191
Epoch [2/3], Step [500/1618], Loss: 1.9778, Perplexity: 7.2271
Epoch [2/3], Step [501/1618], Loss: 1.9755, Perplexity: 7.2101
Epoch [2/3], Step [502/1618], Loss: 2.0042, Perplexity: 7.4198
Epoch [2/3], Step [503/1618], Loss: 2.1360, Perplexity: 8.4654
Epoch [2/3], Step [504/1618], Loss: 2.0457, Perplexity: 7.7344
Epoch [2/3], Step [505/1618], Loss: 2.0874, Perplexity: 8.0640
Epoch [2/3], Step [506/1618], Loss: 2.0631, Perplexity: 7.8700
Epoch [2/3], Step [507/1618], Loss: 2.1345, Perplexity: 8.4531
Epoch [2/3], Step [508/1618], Loss: 2.0426, Perplexity: 7.7106
Epoch [2/3], Step [509/1618], Loss: 2.0325, Perplexity: 7.6329
Epoch [2/3], Step [510/1618], Loss: 1.9850, Perplexity: 7.2787
Epoch [2/3], Step [511/1618], Loss: 2.0657, Perplexity: 7.8909
Epoch [2/3], Step [512/1618], Loss: 2.0917, Perplexity: 8.0986
Epoch [2/3], Step [513/1618], Loss: 2.1327, Perplexity: 8.4376
Epoch [2/3], Step [514/1618], Loss: 2.0764, Perplexity: 7.9756
Epoch [2/3], Step [515/1618], Loss: 2.0412, Perplexity: 7.7001
Epoch [2/3], Step [516/1618], Loss: 3.1823, Perplexity: 24.1022
Epoch [2/3], Step [517/1618], Loss: 2.0544, Perplexity: 7.8019
Epoch [2/3], Step [518/1618], Loss: 2.0952, Perplexity: 8.1274
Epoch [2/3], Step [519/1618], Loss: 2.1059, Perplexity: 8.2146
Epoch [2/3], Step [520/1618], Loss: 2.0767, Perplexity: 7.9781
Epoch [2/3], Step [521/1618], Loss: 1.9947, Perplexity: 7.3497
Epoch [2/3], Step [522/1618], Loss: 2.0277, Perplexity: 7.5967
Epoch [2/3], Step [523/1618], Loss: 2.1074, Perplexity: 8.2272
Epoch [2/3], Step [524/1618], Loss: 2.1312, Perplexity: 8.4248
Epoch [2/3], Step [525/1618], Loss: 2.0186, Perplexity: 7.5276
Epoch [2/3], Step [526/1618], Loss: 2.1003, Perplexity: 8.1686
Epoch [2/3], Step [527/1618], Loss: 2.0991, Perplexity: 8.1584
Epoch [2/3], Step [528/1618], Loss: 2.5192, Perplexity: 12.4190
Epoch [2/3], Step [529/1618], Loss: 2.0061, Perplexity: 7.4343
Epoch [2/3], Step [530/1618], Loss: 2.1021, Perplexity: 8.1836
Epoch [2/3], Step [531/1618], Loss: 2.3879, Perplexity: 10.8910
Epoch [2/3], Step [532/1618], Loss: 2.0681, Perplexity: 7.9102
Epoch [2/3], Step [533/1618], Loss: 2.4306, Perplexity: 11.3651
Epoch [2/3], Step [534/1618], Loss: 2.1243, Perplexity: 8.3674
Epoch [2/3], Step [535/1618], Loss: 2.0979, Perplexity: 8.1487
Epoch [2/3], Step [536/1618], Loss: 2.0729, Perplexity: 7.9478
Epoch [2/3], Step [537/1618], Loss: 2.0477, Perplexity: 7.7501
Epoch [2/3], Step [538/1618], Loss: 2.0810, Perplexity: 8.0123
Epoch [2/3], Step [539/1618], Loss: 2.1011, Perplexity: 8.1753
Epoch [2/3], Step [540/1618], Loss: 2.0360, Perplexity: 7.6597
Epoch [2/3], Step [541/1618], Loss: 2.1103, Perplexity: 8.2510
Epoch [2/3], Step [542/1618], Loss: 3.3417, Perplexity: 28.2682
Epoch [2/3], Step [543/1618], Loss: 2.0374, Perplexity: 7.6708
Epoch [2/3], Step [544/1618], Loss: 2.1586, Perplexity: 8.6594
Epoch [2/3], Step [545/1618], Loss: 2.0754, Perplexity: 7.9681
Epoch [2/3], Step [546/1618], Loss: 2.0832, Perplexity: 8.0300
Epoch [2/3], Step [547/1618], Loss: 2.0760, Perplexity: 7.9723
Epoch [2/3], Step [548/1618], Loss: 2.1137, Perplexity: 8.2792
Epoch [2/3], Step [549/1618], Loss: 1.9797, Perplexity: 7.2406
Epoch [2/3], Step [550/1618], Loss: 2.1053, Perplexity: 8.2098
Epoch [2/3], Step [551/1618], Loss: 2.3587, Perplexity: 10.5773
Epoch [2/3], Step [552/1618], Loss: 1.9843, Perplexity: 7.2742
Epoch [2/3], Step [553/1618], Loss: 2.0367, Perplexity: 7.6652
Epoch [2/3], Step [554/1618], Loss: 2.3963, Perplexity: 10.9827
Epoch [2/3], Step [555/1618], Loss: 2.0986, Perplexity: 8.1546
Epoch [2/3], Step [556/1618], Loss: 2.0596, Perplexity: 7.8426
Epoch [2/3], Step [557/1618], Loss: 2.4054, Perplexity: 11.0828
Epoch [2/3], Step [558/1618], Loss: 2.2532, Perplexity: 9.5186
Epoch [2/3], Step [559/1618], Loss: 2.0457, Perplexity: 7.7344
Epoch [2/3], Step [560/1618], Loss: 2.0365, Perplexity: 7.6634
Epoch [2/3], Step [561/1618], Loss: 2.1881, Perplexity: 8.9186
Epoch [2/3], Step [562/1618], Loss: 2.2340, Perplexity: 9.3367
Epoch [2/3], Step [563/1618], Loss: 2.1411, Perplexity: 8.5085
Epoch [2/3], Step [564/1618], Loss: 2.4134, Perplexity: 11.1724
Epoch [2/3], Step [565/1618], Loss: 1.9662, Perplexity: 7.1438
Epoch [2/3], Step [566/1618], Loss: 2.1320, Perplexity: 8.4320
Epoch [2/3], Step [567/1618], Loss: 1.9610, Perplexity: 7.1068
Epoch [2/3], Step [568/1618], Loss: 1.9837, Perplexity: 7.2698
Epoch [2/3], Step [569/1618], Loss: 2.1211, Perplexity: 8.3405
Epoch [2/3], Step [570/1618], Loss: 2.4312, Perplexity: 11.3723
Epoch [2/3], Step [571/1618], Loss: 2.1171, Perplexity: 8.3067
Epoch [2/3], Step [572/1618], Loss: 2.0325, Perplexity: 7.6334
Epoch [2/3], Step [573/1618], Loss: 2.0998, Perplexity: 8.1647
Epoch [2/3], Step [574/1618], Loss: 2.0989, Perplexity: 8.1570
Epoch [2/3], Step [575/1618], Loss: 2.3079, Perplexity: 10.0533
Epoch [2/3], Step [576/1618], Loss: 2.3393, Perplexity: 10.3741
Epoch [2/3], Step [577/1618], Loss: 2.1040, Perplexity: 8.1985
Epoch [2/3], Step [578/1618], Loss: 2.1382, Perplexity: 8.4843
Epoch [2/3], Step [579/1618], Loss: 2.2826, Perplexity: 9.8023
Epoch [2/3], Step [580/1618], Loss: 1.9697, Perplexity: 7.1688
Epoch [2/3], Step [581/1618], Loss: 2.0022, Perplexity: 7.4051
Epoch [2/3], Step [582/1618], Loss: 2.1682, Perplexity: 8.7423
Epoch [2/3], Step [583/1618], Loss: 2.3035, Perplexity: 10.0096
Epoch [2/3], Step [584/1618], Loss: 1.9890, Perplexity: 7.3083
Epoch [2/3], Step [585/1618], Loss: 2.0903, Perplexity: 8.0870
Epoch [2/3], Step [586/1618], Loss: 2.8879, Perplexity: 17.9556
Epoch [2/3], Step [587/1618], Loss: 1.9802, Perplexity: 7.2442
Epoch [2/3], Step [588/1618], Loss: 2.0748, Perplexity: 7.9628
Epoch [2/3], Step [589/1618], Loss: 2.0313, Perplexity: 7.6239
Epoch [2/3], Step [590/1618], Loss: 2.2397, Perplexity: 9.3908
Epoch [2/3], Step [591/1618], Loss: 2.0729, Perplexity: 7.9480
Epoch [2/3], Step [592/1618], Loss: 2.0514, Perplexity: 7.7787
Epoch [2/3], Step [593/1618], Loss: 2.1456, Perplexity: 8.5468
Epoch [2/3], Step [594/1618], Loss: 2.1640, Perplexity: 8.7059
Epoch [2/3], Step [595/1618], Loss: 2.3731, Perplexity: 10.7302
Epoch [2/3], Step [596/1618], Loss: 1.9931, Perplexity: 7.3382
Epoch [2/3], Step [597/1618], Loss: 2.1459, Perplexity: 8.5499
Epoch [2/3], Step [598/1618], Loss: 2.2863, Perplexity: 9.8383
Epoch [2/3], Step [599/1618], Loss: 2.1279, Perplexity: 8.3971
Epoch [2/3], Step [600/1618], Loss: 2.1967, Perplexity: 8.9950
Epoch [2/3], Step [601/1618], Loss: 2.0794, Perplexity: 7.9993
Epoch [2/3], Step [602/1618], Loss: 2.1322, Perplexity: 8.4337
Epoch [2/3], Step [603/1618], Loss: 2.1399, Perplexity: 8.4988
Epoch [2/3], Step [604/1618], Loss: 2.1252, Perplexity: 8.3743
Epoch [2/3], Step [605/1618], Loss: 2.1200, Perplexity: 8.3312
Epoch [2/3], Step [606/1618], Loss: 2.0768, Perplexity: 7.9787
Epoch [2/3], Step [607/1618], Loss: 2.1230, Perplexity: 8.3558
Epoch [2/3], Step [608/1618], Loss: 2.6428, Perplexity: 14.0520
Epoch [2/3], Step [609/1618], Loss: 2.0341, Perplexity: 7.6457
Epoch [2/3], Step [610/1618], Loss: 2.1165, Perplexity: 8.3023
Epoch [2/3], Step [611/1618], Loss: 1.9416, Perplexity: 6.9698
Epoch [2/3], Step [612/1618], Loss: 2.3670, Perplexity: 10.6656
Epoch [2/3], Step [613/1618], Loss: 2.1653, Perplexity: 8.7170
Epoch [2/3], Step [614/1618], Loss: 2.1211, Perplexity: 8.3407
Epoch [2/3], Step [615/1618], Loss: 2.1033, Perplexity: 8.1934
Epoch [2/3], Step [616/1618], Loss: 2.0940, Perplexity: 8.1170
Epoch [2/3], Step [617/1618], Loss: 2.0708, Perplexity: 7.9312
Epoch [2/3], Step [618/1618], Loss: 2.0522, Perplexity: 7.7847
Epoch [2/3], Step [619/1618], Loss: 2.1312, Perplexity: 8.4246
Epoch [2/3], Step [620/1618], Loss: 1.9890, Perplexity: 7.3082
Epoch [2/3], Step [621/1618], Loss: 2.3537, Perplexity: 10.5248
Epoch [2/3], Step [622/1618], Loss: 2.2532, Perplexity: 9.5183
Epoch [2/3], Step [623/1618], Loss: 2.1445, Perplexity: 8.5376
Epoch [2/3], Step [624/1618], Loss: 2.4483, Perplexity: 11.5685
Epoch [2/3], Step [625/1618], Loss: 2.0029, Perplexity: 7.4102
Epoch [2/3], Step [626/1618], Loss: 2.0789, Perplexity: 7.9955
Epoch [2/3], Step [627/1618], Loss: 2.0851, Perplexity: 8.0457
Epoch [2/3], Step [628/1618], Loss: 2.1032, Perplexity: 8.1923
Epoch [2/3], Step [629/1618], Loss: 2.1153, Perplexity: 8.2923
Epoch [2/3], Step [630/1618], Loss: 2.0576, Perplexity: 7.8269
Epoch [2/3], Step [631/1618], Loss: 2.1773, Perplexity: 8.8222
Epoch [2/3], Step [632/1618], Loss: 1.9991, Perplexity: 7.3820
Epoch [2/3], Step [633/1618], Loss: 2.2571, Perplexity: 9.5552
Epoch [2/3], Step [634/1618], Loss: 2.3190, Perplexity: 10.1660
Epoch [2/3], Step [635/1618], Loss: 2.2263, Perplexity: 9.2655
Epoch [2/3], Step [636/1618], Loss: 2.0807, Perplexity: 8.0100
Epoch [2/3], Step [637/1618], Loss: 2.1329, Perplexity: 8.4391
Epoch [2/3], Step [638/1618], Loss: 2.0978, Perplexity: 8.1486
Epoch [2/3], Step [639/1618], Loss: 2.1260, Perplexity: 8.3809
Epoch [2/3], Step [640/1618], Loss: 2.4768, Perplexity: 11.9030
Epoch [2/3], Step [641/1618], Loss: 2.1599, Perplexity: 8.6703
Epoch [2/3], Step [642/1618], Loss: 1.9558, Perplexity: 7.0699
Epoch [2/3], Step [643/1618], Loss: 2.1292, Perplexity: 8.4085
Epoch [2/3], Step [644/1618], Loss: 2.3443, Perplexity: 10.4263
Epoch [2/3], Step [645/1618], Loss: 2.0196, Perplexity: 7.5356
Epoch [2/3], Step [646/1618], Loss: 2.0134, Perplexity: 7.4887
Epoch [2/3], Step [647/1618], Loss: 2.0025, Perplexity: 7.4074
Epoch [2/3], Step [648/1618], Loss: 2.3058, Perplexity: 10.0325
Epoch [2/3], Step [649/1618], Loss: 2.5524, Perplexity: 12.8383
Epoch [2/3], Step [650/1618], Loss: 2.1060, Perplexity: 8.2150
Epoch [2/3], Step [651/1618], Loss: 2.7590, Perplexity: 15.7842
Epoch [2/3], Step [652/1618], Loss: 2.0120, Perplexity: 7.4785
Epoch [2/3], Step [653/1618], Loss: 2.0989, Perplexity: 8.1574
Epoch [2/3], Step [654/1618], Loss: 2.0931, Perplexity: 8.1098
Epoch [2/3], Step [655/1618], Loss: 2.0489, Perplexity: 7.7593
Epoch [2/3], Step [656/1618], Loss: 2.9798, Perplexity: 19.6841
Epoch [2/3], Step [657/1618], Loss: 2.0883, Perplexity: 8.0710
Epoch [2/3], Step [658/1618], Loss: 1.9710, Perplexity: 7.1781
Epoch [2/3], Step [659/1618], Loss: 2.1173, Perplexity: 8.3087
Epoch [2/3], Step [660/1618], Loss: 2.0880, Perplexity: 8.0690
Epoch [2/3], Step [661/1618], Loss: 2.0588, Perplexity: 7.8369
Epoch [2/3], Step [662/1618], Loss: 2.1899, Perplexity: 8.9340
Epoch [2/3], Step [663/1618], Loss: 2.1276, Perplexity: 8.3948
Epoch [2/3], Step [664/1618], Loss: 2.1389, Perplexity: 8.4905
Epoch [2/3], Step [665/1618], Loss: 2.0813, Perplexity: 8.0146
Epoch [2/3], Step [666/1618], Loss: 2.0844, Perplexity: 8.0398
Epoch [2/3], Step [667/1618], Loss: 2.1134, Perplexity: 8.2765
Epoch [2/3], Step [668/1618], Loss: 2.0974, Perplexity: 8.1448
Epoch [2/3], Step [669/1618], Loss: 2.0197, Perplexity: 7.5359
Epoch [2/3], Step [670/1618], Loss: 2.4247, Perplexity: 11.2993
Epoch [2/3], Step [671/1618], Loss: 2.5508, Perplexity: 12.8179
Epoch [2/3], Step [672/1618], Loss: 2.1162, Perplexity: 8.2994
Epoch [2/3], Step [673/1618], Loss: 1.9845, Perplexity: 7.2752
Epoch [2/3], Step [674/1618], Loss: 2.0647, Perplexity: 7.8829
Epoch [2/3], Step [675/1618], Loss: 2.1561, Perplexity: 8.6377
Epoch [2/3], Step [676/1618], Loss: 2.3389, Perplexity: 10.3696
Epoch [2/3], Step [677/1618], Loss: 2.4812, Perplexity: 11.9551
Epoch [2/3], Step [678/1618], Loss: 2.1548, Perplexity: 8.6266
Epoch [2/3], Step [679/1618], Loss: 2.3627, Perplexity: 10.6194
Epoch [2/3], Step [680/1618], Loss: 3.2807, Perplexity: 26.5953
Epoch [2/3], Step [681/1618], Loss: 2.0211, Perplexity: 7.5466
Epoch [2/3], Step [682/1618], Loss: 2.0411, Perplexity: 7.6991
Epoch [2/3], Step [683/1618], Loss: 2.2629, Perplexity: 9.6113
Epoch [2/3], Step [684/1618], Loss: 2.0318, Perplexity: 7.6280
Epoch [2/3], Step [685/1618], Loss: 1.9901, Perplexity: 7.3164
Epoch [2/3], Step [686/1618], Loss: 2.0515, Perplexity: 7.7793
Epoch [2/3], Step [687/1618], Loss: 2.3494, Perplexity: 10.4795
Epoch [2/3], Step [688/1618], Loss: 2.0425, Perplexity: 7.7095
Epoch [2/3], Step [689/1618], Loss: 2.1027, Perplexity: 8.1881
Epoch [2/3], Step [690/1618], Loss: 1.9776, Perplexity: 7.2256
Epoch [2/3], Step [691/1618], Loss: 2.1278, Perplexity: 8.3965
Epoch [2/3], Step [692/1618], Loss: 2.0349, Perplexity: 7.6512
Epoch [2/3], Step [693/1618], Loss: 1.9665, Perplexity: 7.1457
Epoch [2/3], Step [694/1618], Loss: 2.1217, Perplexity: 8.3450
Epoch [2/3], Step [695/1618], Loss: 2.3895, Perplexity: 10.9084
Epoch [2/3], Step [696/1618], Loss: 2.3054, Perplexity: 10.0277
Epoch [2/3], Step [697/1618], Loss: 2.0188, Perplexity: 7.5290
Epoch [2/3], Step [698/1618], Loss: 2.0925, Perplexity: 8.1053
Epoch [2/3], Step [699/1618], Loss: 2.5974, Perplexity: 13.4294
Epoch [2/3], Step [700/1618], Loss: 2.0597, Perplexity: 7.8439
Epoch [2/3], Step [701/1618], Loss: 2.3420, Perplexity: 10.4017
Epoch [2/3], Step [702/1618], Loss: 2.3211, Perplexity: 10.1871
Epoch [2/3], Step [703/1618], Loss: 2.0890, Perplexity: 8.0768
Epoch [2/3], Step [704/1618], Loss: 2.1336, Perplexity: 8.4452
Epoch [2/3], Step [705/1618], Loss: 2.0534, Perplexity: 7.7947
Epoch [2/3], Step [706/1618], Loss: 2.0666, Perplexity: 7.8979
Epoch [2/3], Step [707/1618], Loss: 2.0825, Perplexity: 8.0242
Epoch [2/3], Step [708/1618], Loss: 2.1443, Perplexity: 8.5364
Epoch [2/3], Step [709/1618], Loss: 2.0627, Perplexity: 7.8673
Epoch [2/3], Step [710/1618], Loss: 2.0893, Perplexity: 8.0795
Epoch [2/3], Step [711/1618], Loss: 2.1212, Perplexity: 8.3408
Epoch [2/3], Step [712/1618], Loss: 1.9570, Perplexity: 7.0782
Epoch [2/3], Step [713/1618], Loss: 2.3009, Perplexity: 9.9836
Epoch [2/3], Step [714/1618], Loss: 2.1293, Perplexity: 8.4086
Epoch [2/3], Step [715/1618], Loss: 2.1751, Perplexity: 8.8033
Epoch [2/3], Step [716/1618], Loss: 2.0594, Perplexity: 7.8411
Epoch [2/3], Step [717/1618], Loss: 2.4376, Perplexity: 11.4456
Epoch [2/3], Step [718/1618], Loss: 2.1710, Perplexity: 8.7674
Epoch [2/3], Step [719/1618], Loss: 2.0701, Perplexity: 7.9258
Epoch [2/3], Step [720/1618], Loss: 2.1122, Perplexity: 8.2662
Epoch [2/3], Step [721/1618], Loss: 2.0818, Perplexity: 8.0186
Epoch [2/3], Step [722/1618], Loss: 1.9994, Perplexity: 7.3846
Epoch [2/3], Step [723/1618], Loss: 2.4096, Perplexity: 11.1290
Epoch [2/3], Step [724/1618], Loss: 2.0628, Perplexity: 7.8680
Epoch [2/3], Step [725/1618], Loss: 2.3271, Perplexity: 10.2487
Epoch [2/3], Step [726/1618], Loss: 2.1676, Perplexity: 8.7373
Epoch [2/3], Step [727/1618], Loss: 2.1881, Perplexity: 8.9186
Epoch [2/3], Step [728/1618], Loss: 2.6490, Perplexity: 14.1398
Epoch [2/3], Step [729/1618], Loss: 2.2059, Perplexity: 9.0785
Epoch [2/3], Step [730/1618], Loss: 2.0177, Perplexity: 7.5209
Epoch [2/3], Step [731/1618], Loss: 2.1017, Perplexity: 8.1804
Epoch [2/3], Step [732/1618], Loss: 2.1041, Perplexity: 8.1999
Epoch [2/3], Step [733/1618], Loss: 2.0222, Perplexity: 7.5553
Epoch [2/3], Step [734/1618], Loss: 2.0692, Perplexity: 7.9184
Epoch [2/3], Step [735/1618], Loss: 2.1216, Perplexity: 8.3447
Epoch [2/3], Step [736/1618], Loss: 1.9255, Perplexity: 6.8587
Epoch [2/3], Step [737/1618], Loss: 2.0392, Perplexity: 7.6842
Epoch [2/3], Step [738/1618], Loss: 2.1601, Perplexity: 8.6720
Epoch [2/3], Step [739/1618], Loss: 2.2457, Perplexity: 9.4468
Epoch [2/3], Step [740/1618], Loss: 2.0485, Perplexity: 7.7566
Epoch [2/3], Step [741/1618], Loss: 3.1348, Perplexity: 22.9840
Epoch [2/3], Step [742/1618], Loss: 2.1985, Perplexity: 9.0118
Epoch [2/3], Step [743/1618], Loss: 2.0339, Perplexity: 7.6437
Epoch [2/3], Step [744/1618], Loss: 3.0080, Perplexity: 20.2478
Epoch [2/3], Step [745/1618], Loss: 2.0840, Perplexity: 8.0363
Epoch [2/3], Step [746/1618], Loss: 2.0689, Perplexity: 7.9157
Epoch [2/3], Step [747/1618], Loss: 1.9661, Perplexity: 7.1431
Epoch [2/3], Step [748/1618], Loss: 2.2029, Perplexity: 9.0515
Epoch [2/3], Step [749/1618], Loss: 2.1412, Perplexity: 8.5094
Epoch [2/3], Step [750/1618], Loss: 2.7750, Perplexity: 16.0390
Epoch [2/3], Step [751/1618], Loss: 2.0554, Perplexity: 7.8101
Epoch [2/3], Step [752/1618], Loss: 2.1875, Perplexity: 8.9131
Epoch [2/3], Step [753/1618], Loss: 2.0906, Perplexity: 8.0900
Epoch [2/3], Step [754/1618], Loss: 1.9824, Perplexity: 7.2602
Epoch [2/3], Step [755/1618], Loss: 2.0612, Perplexity: 7.8553
Epoch [2/3], Step [756/1618], Loss: 2.0499, Perplexity: 7.7667
Epoch [2/3], Step [757/1618], Loss: 2.4095, Perplexity: 11.1279
Epoch [2/3], Step [758/1618], Loss: 2.1256, Perplexity: 8.3778
Epoch [2/3], Step [759/1618], Loss: 2.1670, Perplexity: 8.7321
Epoch [2/3], Step [760/1618], Loss: 2.0082, Perplexity: 7.4496
Epoch [2/3], Step [761/1618], Loss: 2.0100, Perplexity: 7.4631
Epoch [2/3], Step [762/1618], Loss: 2.1155, Perplexity: 8.2936
Epoch [2/3], Step [763/1618], Loss: 2.0921, Perplexity: 8.1017
Epoch [2/3], Step [764/1618], Loss: 2.4112, Perplexity: 11.1469
Epoch [2/3], Step [765/1618], Loss: 1.9286, Perplexity: 6.8797
Epoch [2/3], Step [766/1618], Loss: 2.2795, Perplexity: 9.7721
Epoch [2/3], Step [767/1618], Loss: 2.0659, Perplexity: 7.8927
Epoch [2/3], Step [768/1618], Loss: 2.2902, Perplexity: 9.8774
Epoch [2/3], Step [769/1618], Loss: 2.0528, Perplexity: 7.7898
Epoch [2/3], Step [770/1618], Loss: 2.0121, Perplexity: 7.4789
Epoch [2/3], Step [771/1618], Loss: 3.0746, Perplexity: 21.6415
Epoch [2/3], Step [772/1618], Loss: 2.2105, Perplexity: 9.1202
Epoch [2/3], Step [773/1618], Loss: 2.0141, Perplexity: 7.4941
Epoch [2/3], Step [774/1618], Loss: 2.1017, Perplexity: 8.1797
Epoch [2/3], Step [775/1618], Loss: 2.0937, Perplexity: 8.1145
Epoch [2/3], Step [776/1618], Loss: 2.4845, Perplexity: 11.9956
Epoch [2/3], Step [777/1618], Loss: 2.1348, Perplexity: 8.4552
Epoch [2/3], Step [778/1618], Loss: 2.0091, Perplexity: 7.4565
Epoch [2/3], Step [779/1618], Loss: 2.1215, Perplexity: 8.3438
Epoch [2/3], Step [780/1618], Loss: 2.2878, Perplexity: 9.8534
Epoch [2/3], Step [781/1618], Loss: 2.0318, Perplexity: 7.6281
Epoch [2/3], Step [782/1618], Loss: 2.0155, Perplexity: 7.5042
Epoch [2/3], Step [783/1618], Loss: 2.0803, Perplexity: 8.0072
Epoch [2/3], Step [784/1618], Loss: 1.9531, Perplexity: 7.0503
Epoch [2/3], Step [785/1618], Loss: 2.0824, Perplexity: 8.0235
Epoch [2/3], Step [786/1618], Loss: 2.0332, Perplexity: 7.6387
Epoch [2/3], Step [787/1618], Loss: 2.0939, Perplexity: 8.1161
Epoch [2/3], Step [788/1618], Loss: 2.0154, Perplexity: 7.5039
Epoch [2/3], Step [789/1618], Loss: 2.0442, Perplexity: 7.7231
Epoch [2/3], Step [790/1618], Loss: 2.3827, Perplexity: 10.8344
Epoch [2/3], Step [791/1618], Loss: 2.2403, Perplexity: 9.3963
Epoch [2/3], Step [792/1618], Loss: 1.9589, Perplexity: 7.0914
Epoch [2/3], Step [793/1618], Loss: 1.9946, Perplexity: 7.3491
Epoch [2/3], Step [794/1618], Loss: 2.1147, Perplexity: 8.2871
Epoch [2/3], Step [795/1618], Loss: 2.7350, Perplexity: 15.4094
Epoch [2/3], Step [796/1618], Loss: 2.0431, Perplexity: 7.7147
Epoch [2/3], Step [797/1618], Loss: 2.0186, Perplexity: 7.5277
Epoch [2/3], Step [798/1618], Loss: 2.0113, Perplexity: 7.4734
Epoch [2/3], Step [799/1618], Loss: 2.0426, Perplexity: 7.7105
Epoch [2/3], Step [800/1618], Loss: 1.9692, Perplexity: 7.1651
Epoch [2/3], Step [801/1618], Loss: 2.2544, Perplexity: 9.5298
Epoch [2/3], Step [802/1618], Loss: 2.1845, Perplexity: 8.8860
Epoch [2/3], Step [803/1618], Loss: 2.0315, Perplexity: 7.6256
Epoch [2/3], Step [804/1618], Loss: 2.5480, Perplexity: 12.7819
Epoch [2/3], Step [805/1618], Loss: 2.0330, Perplexity: 7.6366
Epoch [2/3], Step [806/1618], Loss: 1.9861, Perplexity: 7.2870
Epoch [2/3], Step [807/1618], Loss: 2.0579, Perplexity: 7.8298
Epoch [2/3], Step [808/1618], Loss: 2.0645, Perplexity: 7.8810
Epoch [2/3], Step [809/1618], Loss: 2.1799, Perplexity: 8.8458
Epoch [2/3], Step [810/1618], Loss: 2.1209, Perplexity: 8.3387
Epoch [2/3], Step [811/1618], Loss: 2.0265, Perplexity: 7.5872
Epoch [2/3], Step [812/1618], Loss: 2.1054, Perplexity: 8.2103
Epoch [2/3], Step [813/1618], Loss: 2.0772, Perplexity: 7.9824
Epoch [2/3], Step [814/1618], Loss: 2.3546, Perplexity: 10.5337
Epoch [2/3], Step [815/1618], Loss: 1.9579, Perplexity: 7.0841
Epoch [2/3], Step [816/1618], Loss: 2.0948, Perplexity: 8.1238
Epoch [2/3], Step [817/1618], Loss: 2.7957, Perplexity: 16.3743
Epoch [2/3], Step [818/1618], Loss: 2.3292, Perplexity: 10.2696
Epoch [2/3], Step [819/1618], Loss: 2.0035, Perplexity: 7.4152
Epoch [2/3], Step [820/1618], Loss: 2.0603, Perplexity: 7.8483
Epoch [2/3], Step [821/1618], Loss: 2.1468, Perplexity: 8.5577
Epoch [2/3], Step [822/1618], Loss: 2.0615, Perplexity: 7.8576
Epoch [2/3], Step [823/1618], Loss: 2.0905, Perplexity: 8.0889
Epoch [2/3], Step [824/1618], Loss: 2.3491, Perplexity: 10.4757
Epoch [2/3], Step [825/1618], Loss: 1.9156, Perplexity: 6.7912
Epoch [2/3], Step [826/1618], Loss: 2.0972, Perplexity: 8.1436
Epoch [2/3], Step [827/1618], Loss: 2.0511, Perplexity: 7.7765
Epoch [2/3], Step [828/1618], Loss: 2.0835, Perplexity: 8.0325
Epoch [2/3], Step [829/1618], Loss: 1.9596, Perplexity: 7.0968
Epoch [2/3], Step [830/1618], Loss: 2.2499, Perplexity: 9.4870
Epoch [2/3], Step [831/1618], Loss: 2.0851, Perplexity: 8.0456
Epoch [2/3], Step [832/1618], Loss: 2.0619, Perplexity: 7.8611
Epoch [2/3], Step [833/1618], Loss: 1.9797, Perplexity: 7.2409
Epoch [2/3], Step [834/1618], Loss: 2.3539, Perplexity: 10.5266
Epoch [2/3], Step [835/1618], Loss: 2.0528, Perplexity: 7.7895
Epoch [2/3], Step [836/1618], Loss: 2.0722, Perplexity: 7.9423
Epoch [2/3], Step [837/1618], Loss: 2.0937, Perplexity: 8.1152
Epoch [2/3], Step [838/1618], Loss: 2.4368, Perplexity: 11.4363
Epoch [2/3], Step [839/1618], Loss: 1.9934, Perplexity: 7.3405
Epoch [2/3], Step [840/1618], Loss: 2.0592, Perplexity: 7.8401
Epoch [2/3], Step [841/1618], Loss: 2.2240, Perplexity: 9.2440
Epoch [2/3], Step [842/1618], Loss: 2.4101, Perplexity: 11.1353
Epoch [2/3], Step [843/1618], Loss: 2.0317, Perplexity: 7.6273
Epoch [2/3], Step [844/1618], Loss: 2.0373, Perplexity: 7.6698
Epoch [2/3], Step [845/1618], Loss: 2.1143, Perplexity: 8.2837
Epoch [2/3], Step [846/1618], Loss: 2.0371, Perplexity: 7.6686
Epoch [2/3], Step [847/1618], Loss: 2.1335, Perplexity: 8.4446
Epoch [2/3], Step [848/1618], Loss: 2.0585, Perplexity: 7.8344
Epoch [2/3], Step [849/1618], Loss: 2.0831, Perplexity: 8.0290
Epoch [2/3], Step [850/1618], Loss: 2.0697, Perplexity: 7.9222
Epoch [2/3], Step [851/1618], Loss: 2.0445, Perplexity: 7.7252
Epoch [2/3], Step [852/1618], Loss: 2.9383, Perplexity: 18.8829
Epoch [2/3], Step [853/1618], Loss: 1.9539, Perplexity: 7.0562
Epoch [2/3], Step [854/1618], Loss: 1.9855, Perplexity: 7.2824
Epoch [2/3], Step [855/1618], Loss: 1.9473, Perplexity: 7.0096
Epoch [2/3], Step [856/1618], Loss: 2.8068, Perplexity: 16.5570
Epoch [2/3], Step [857/1618], Loss: 2.1066, Perplexity: 8.2199
Epoch [2/3], Step [858/1618], Loss: 2.0678, Perplexity: 7.9076
Epoch [2/3], Step [859/1618], Loss: 2.0488, Perplexity: 7.7587
Epoch [2/3], Step [860/1618], Loss: 2.1108, Perplexity: 8.2545
Epoch [2/3], Step [861/1618], Loss: 2.0512, Perplexity: 7.7773
Epoch [2/3], Step [862/1618], Loss: 1.9711, Perplexity: 7.1782
Epoch [2/3], Step [863/1618], Loss: 2.0769, Perplexity: 7.9799
Epoch [2/3], Step [864/1618], Loss: 2.0709, Perplexity: 7.9317
Epoch [2/3], Step [865/1618], Loss: 2.0682, Perplexity: 7.9108
Epoch [2/3], Step [866/1618], Loss: 2.0337, Perplexity: 7.6426
Epoch [2/3], Step [867/1618], Loss: 1.9840, Perplexity: 7.2718
Epoch [2/3], Step [868/1618], Loss: 2.0304, Perplexity: 7.6172
Epoch [2/3], Step [869/1618], Loss: 1.9941, Perplexity: 7.3456
Epoch [2/3], Step [870/1618], Loss: 2.0703, Perplexity: 7.9271
Epoch [2/3], Step [871/1618], Loss: 2.3313, Perplexity: 10.2909
Epoch [2/3], Step [872/1618], Loss: 2.0835, Perplexity: 8.0323
Epoch [2/3], Step [873/1618], Loss: 2.2899, Perplexity: 9.8735
Epoch [2/3], Step [874/1618], Loss: 2.4065, Perplexity: 11.0953
Epoch [2/3], Step [875/1618], Loss: 2.1389, Perplexity: 8.4898
Epoch [2/3], Step [876/1618], Loss: 2.0181, Perplexity: 7.5239
Epoch [2/3], Step [877/1618], Loss: 1.9982, Perplexity: 7.3759
Epoch [2/3], Step [878/1618], Loss: 2.0197, Perplexity: 7.5364
Epoch [2/3], Step [879/1618], Loss: 2.1405, Perplexity: 8.5041
Epoch [2/3], Step [880/1618], Loss: 2.0480, Perplexity: 7.7520
Epoch [2/3], Step [881/1618], Loss: 2.6754, Perplexity: 14.5175
Epoch [2/3], Step [882/1618], Loss: 2.0538, Perplexity: 7.7976
Epoch [2/3], Step [883/1618], Loss: 2.0038, Perplexity: 7.4171
Epoch [2/3], Step [884/1618], Loss: 2.0485, Perplexity: 7.7560
Epoch [2/3], Step [885/1618], Loss: 2.1156, Perplexity: 8.2945
Epoch [2/3], Step [886/1618], Loss: 2.0080, Perplexity: 7.4484
Epoch [2/3], Step [887/1618], Loss: 2.0720, Perplexity: 7.9406
Epoch [2/3], Step [888/1618], Loss: 2.0596, Perplexity: 7.8427
Epoch [2/3], Step [889/1618], Loss: 2.0801, Perplexity: 8.0054
Epoch [2/3], Step [890/1618], Loss: 1.9699, Perplexity: 7.1698
Epoch [2/3], Step [891/1618], Loss: 2.0661, Perplexity: 7.8943
Epoch [2/3], Step [892/1618], Loss: 2.0900, Perplexity: 8.0848
Epoch [2/3], Step [893/1618], Loss: 2.3496, Perplexity: 10.4814
Epoch [2/3], Step [894/1618], Loss: 1.9976, Perplexity: 7.3711
Epoch [2/3], Step [895/1618], Loss: 2.1641, Perplexity: 8.7068
Epoch [2/3], Step [896/1618], Loss: 2.0399, Perplexity: 7.6900
Epoch [2/3], Step [897/1618], Loss: 2.0268, Perplexity: 7.5901
Epoch [2/3], Step [898/1618], Loss: 1.9235, Perplexity: 6.8451
Epoch [2/3], Step [899/1618], Loss: 2.2868, Perplexity: 9.8436
Epoch [2/3], Step [900/1618], Loss: 2.0624, Perplexity: 7.8647
Epoch [2/3], Step [901/1618], Loss: 2.0793, Perplexity: 7.9989
Epoch [2/3], Step [902/1618], Loss: 2.1278, Perplexity: 8.3965
Epoch [2/3], Step [903/1618], Loss: 2.0423, Perplexity: 7.7080
Epoch [2/3], Step [904/1618], Loss: 2.0048, Perplexity: 7.4247
Epoch [2/3], Step [905/1618], Loss: 2.0258, Perplexity: 7.5819
Epoch [2/3], Step [906/1618], Loss: 2.6340, Perplexity: 13.9288
Epoch [2/3], Step [907/1618], Loss: 2.0403, Perplexity: 7.6927
Epoch [2/3], Step [908/1618], Loss: 2.2586, Perplexity: 9.5698
Epoch [2/3], Step [909/1618], Loss: 2.0136, Perplexity: 7.4899
Epoch [2/3], Step [910/1618], Loss: 2.1035, Perplexity: 8.1948
Epoch [2/3], Step [911/1618], Loss: 4.4935, Perplexity: 89.4303
Epoch [2/3], Step [912/1618], Loss: 2.2735, Perplexity: 9.7133
Epoch [2/3], Step [913/1618], Loss: 1.9761, Perplexity: 7.2149
Epoch [2/3], Step [914/1618], Loss: 2.0375, Perplexity: 7.6710
Epoch [2/3], Step [915/1618], Loss: 2.0165, Perplexity: 7.5120
Epoch [2/3], Step [916/1618], Loss: 2.0797, Perplexity: 8.0019
Epoch [2/3], Step [917/1618], Loss: 2.0323, Perplexity: 7.6316
Epoch [2/3], Step [918/1618], Loss: 2.3155, Perplexity: 10.1304
Epoch [2/3], Step [919/1618], Loss: 1.9863, Perplexity: 7.2887
Epoch [2/3], Step [920/1618], Loss: 2.1868, Perplexity: 8.9068
Epoch [2/3], Step [921/1618], Loss: 2.2072, Perplexity: 9.0904
Epoch [2/3], Step [922/1618], Loss: 2.0125, Perplexity: 7.4822
Epoch [2/3], Step [923/1618], Loss: 2.0653, Perplexity: 7.8877
Epoch [2/3], Step [924/1618], Loss: 2.0764, Perplexity: 7.9754
Epoch [2/3], Step [925/1618], Loss: 2.0969, Perplexity: 8.1409
Epoch [2/3], Step [926/1618], Loss: 2.1245, Perplexity: 8.3685
Epoch [2/3], Step [927/1618], Loss: 1.9142, Perplexity: 6.7815
Epoch [2/3], Step [928/1618], Loss: 2.1504, Perplexity: 8.5884
Epoch [2/3], Step [929/1618], Loss: 2.0963, Perplexity: 8.1358
Epoch [2/3], Step [930/1618], Loss: 2.0311, Perplexity: 7.6224
Epoch [2/3], Step [931/1618], Loss: 2.1451, Perplexity: 8.5425
Epoch [2/3], Step [932/1618], Loss: 2.0387, Perplexity: 7.6807
Epoch [2/3], Step [933/1618], Loss: 2.0653, Perplexity: 7.8874
Epoch [2/3], Step [934/1618], Loss: 2.1065, Perplexity: 8.2198
Epoch [2/3], Step [935/1618], Loss: 1.9910, Perplexity: 7.3231
Epoch [2/3], Step [936/1618], Loss: 2.1393, Perplexity: 8.4936
Epoch [2/3], Step [937/1618], Loss: 2.0696, Perplexity: 7.9220
Epoch [2/3], Step [938/1618], Loss: 2.1305, Perplexity: 8.4188
Epoch [2/3], Step [939/1618], Loss: 1.9817, Perplexity: 7.2551
Epoch [2/3], Step [940/1618], Loss: 2.2518, Perplexity: 9.5044
Epoch [2/3], Step [941/1618], Loss: 2.1069, Perplexity: 8.2223
Epoch [2/3], Step [942/1618], Loss: 1.9635, Perplexity: 7.1244
Epoch [2/3], Step [943/1618], Loss: 2.2828, Perplexity: 9.8042
Epoch [2/3], Step [944/1618], Loss: 2.0653, Perplexity: 7.8880
Epoch [2/3], Step [945/1618], Loss: 2.0273, Perplexity: 7.5933
Epoch [2/3], Step [946/1618], Loss: 2.3217, Perplexity: 10.1929
Epoch [2/3], Step [947/1618], Loss: 2.0742, Perplexity: 7.9583
Epoch [2/3], Step [948/1618], Loss: 2.0790, Perplexity: 7.9961
Epoch [2/3], Step [949/1618], Loss: 2.0371, Perplexity: 7.6681
Epoch [2/3], Step [950/1618], Loss: 2.0041, Perplexity: 7.4196
Epoch [2/3], Step [951/1618], Loss: 2.0766, Perplexity: 7.9777
Epoch [2/3], Step [952/1618], Loss: 2.0482, Perplexity: 7.7537
Epoch [2/3], Step [953/1618], Loss: 2.3179, Perplexity: 10.1545
Epoch [2/3], Step [954/1618], Loss: 2.0425, Perplexity: 7.7101
Epoch [2/3], Step [955/1618], Loss: 2.1505, Perplexity: 8.5889
Epoch [2/3], Step [956/1618], Loss: 2.2636, Perplexity: 9.6176
Epoch [2/3], Step [957/1618], Loss: 2.6692, Perplexity: 14.4281
Epoch [2/3], Step [958/1618], Loss: 2.1064, Perplexity: 8.2183
Epoch [2/3], Step [959/1618], Loss: 2.0584, Perplexity: 7.8334
Epoch [2/3], Step [960/1618], Loss: 2.4656, Perplexity: 11.7702
Epoch [2/3], Step [961/1618], Loss: 2.0666, Perplexity: 7.8981
Epoch [2/3], Step [962/1618], Loss: 2.7279, Perplexity: 15.3003
Epoch [2/3], Step [963/1618], Loss: 2.0356, Perplexity: 7.6570
Epoch [2/3], Step [964/1618], Loss: 2.1253, Perplexity: 8.3755
Epoch [2/3], Step [965/1618], Loss: 2.0336, Perplexity: 7.6419
Epoch [2/3], Step [966/1618], Loss: 2.7235, Perplexity: 15.2337
Epoch [2/3], Step [967/1618], Loss: 2.2587, Perplexity: 9.5704
Epoch [2/3], Step [968/1618], Loss: 2.1482, Perplexity: 8.5696
Epoch [2/3], Step [969/1618], Loss: 2.1412, Perplexity: 8.5095
Epoch [2/3], Step [970/1618], Loss: 1.9861, Perplexity: 7.2871
Epoch [2/3], Step [971/1618], Loss: 1.9836, Perplexity: 7.2691
Epoch [2/3], Step [972/1618], Loss: 2.0574, Perplexity: 7.8258
Epoch [2/3], Step [973/1618], Loss: 2.1240, Perplexity: 8.3644
Epoch [2/3], Step [974/1618], Loss: 2.0306, Perplexity: 7.6188
Epoch [2/3], Step [975/1618], Loss: 2.0164, Perplexity: 7.5112
Epoch [2/3], Step [976/1618], Loss: 2.0042, Perplexity: 7.4199
Epoch [2/3], Step [977/1618], Loss: 2.2350, Perplexity: 9.3464
Epoch [2/3], Step [978/1618], Loss: 1.9385, Perplexity: 6.9484
Epoch [2/3], Step [979/1618], Loss: 2.0969, Perplexity: 8.1411
Epoch [2/3], Step [980/1618], Loss: 2.1445, Perplexity: 8.5378
Epoch [2/3], Step [981/1618], Loss: 2.0478, Perplexity: 7.7508
Epoch [2/3], Step [982/1618], Loss: 1.9694, Perplexity: 7.1665
Epoch [2/3], Step [983/1618], Loss: 2.0098, Perplexity: 7.4619
Epoch [2/3], Step [984/1618], Loss: 2.1149, Perplexity: 8.2888
Epoch [2/3], Step [985/1618], Loss: 2.0579, Perplexity: 7.8293
Epoch [2/3], Step [986/1618], Loss: 2.1742, Perplexity: 8.7953
Epoch [2/3], Step [987/1618], Loss: 1.9986, Perplexity: 7.3784
Epoch [2/3], Step [988/1618], Loss: 2.0235, Perplexity: 7.5649
Epoch [2/3], Step [989/1618], Loss: 2.1173, Perplexity: 8.3088
Epoch [2/3], Step [990/1618], Loss: 2.3044, Perplexity: 10.0186
Epoch [2/3], Step [991/1618], Loss: 2.0341, Perplexity: 7.6456
Epoch [2/3], Step [992/1618], Loss: 1.9848, Perplexity: 7.2779
Epoch [2/3], Step [993/1618], Loss: 2.0801, Perplexity: 8.0055
Epoch [2/3], Step [994/1618], Loss: 1.9376, Perplexity: 6.9417
Epoch [2/3], Step [995/1618], Loss: 2.2355, Perplexity: 9.3507
Epoch [2/3], Step [996/1618], Loss: 1.9415, Perplexity: 6.9694
Epoch [2/3], Step [997/1618], Loss: 2.0539, Perplexity: 7.7981
Epoch [2/3], Step [998/1618], Loss: 2.3397, Perplexity: 10.3784
Epoch [2/3], Step [999/1618], Loss: 2.0245, Perplexity: 7.5720
Epoch [2/3], Step [1000/1618], Loss: 2.0704, Perplexity: 7.9283
Epoch [2/3], Step [1001/1618], Loss: 2.0397, Perplexity: 7.6883
Epoch [2/3], Step [1002/1618], Loss: 2.2494, Perplexity: 9.4822
Epoch [2/3], Step [1003/1618], Loss: 2.0539, Perplexity: 7.7981
Epoch [2/3], Step [1004/1618], Loss: 2.0357, Perplexity: 7.6578
Epoch [2/3], Step [1005/1618], Loss: 2.0836, Perplexity: 8.0335
Epoch [2/3], Step [1006/1618], Loss: 2.4585, Perplexity: 11.6868
Epoch [2/3], Step [1007/1618], Loss: 2.0320, Perplexity: 7.6290
Epoch [2/3], Step [1008/1618], Loss: 2.0873, Perplexity: 8.0631
Epoch [2/3], Step [1009/1618], Loss: 2.0115, Perplexity: 7.4743
Epoch [2/3], Step [1010/1618], Loss: 2.0420, Perplexity: 7.7061
Epoch [2/3], Step [1011/1618], Loss: 2.0747, Perplexity: 7.9623
Epoch [2/3], Step [1012/1618], Loss: 2.0147, Perplexity: 7.4985
Epoch [2/3], Step [1013/1618], Loss: 2.4239, Perplexity: 11.2894
Epoch [2/3], Step [1014/1618], Loss: 1.8928, Perplexity: 6.6379
Epoch [2/3], Step [1015/1618], Loss: 1.9768, Perplexity: 7.2199
Epoch [2/3], Step [1016/1618], Loss: 2.1131, Perplexity: 8.2739
Epoch [2/3], Step [1017/1618], Loss: 1.9756, Perplexity: 7.2107
Epoch [2/3], Step [1018/1618], Loss: 1.9916, Perplexity: 7.3275
Epoch [2/3], Step [1019/1618], Loss: 2.1077, Perplexity: 8.2294
Epoch [2/3], Step [1020/1618], Loss: 2.0239, Perplexity: 7.5676
Epoch [2/3], Step [1021/1618], Loss: 2.1004, Perplexity: 8.1694
Epoch [2/3], Step [1022/1618], Loss: 2.0194, Perplexity: 7.5341
Epoch [2/3], Step [1023/1618], Loss: 2.2870, Perplexity: 9.8453
Epoch [2/3], Step [1024/1618], Loss: 2.1409, Perplexity: 8.5070
Epoch [2/3], Step [1025/1618], Loss: 2.0091, Perplexity: 7.4567
Epoch [2/3], Step [1026/1618], Loss: 2.1700, Perplexity: 8.7583
Epoch [2/3], Step [1027/1618], Loss: 2.0463, Perplexity: 7.7392
Epoch [2/3], Step [1028/1618], Loss: 2.0930, Perplexity: 8.1092
Epoch [2/3], Step [1029/1618], Loss: 2.0619, Perplexity: 7.8612
Epoch [2/3], Step [1030/1618], Loss: 2.7359, Perplexity: 15.4236
Epoch [2/3], Step [1031/1618], Loss: 2.0297, Perplexity: 7.6117
Epoch [2/3], Step [1032/1618], Loss: 2.2382, Perplexity: 9.3761
Epoch [2/3], Step [1033/1618], Loss: 1.9015, Perplexity: 6.6956
Epoch [2/3], Step [1034/1618], Loss: 2.0800, Perplexity: 8.0043
Epoch [2/3], Step [1035/1618], Loss: 2.0706, Perplexity: 7.9296
Epoch [2/3], Step [1036/1618], Loss: 2.1314, Perplexity: 8.4265
Epoch [2/3], Step [1037/1618], Loss: 2.1307, Perplexity: 8.4208
Epoch [2/3], Step [1038/1618], Loss: 2.0956, Perplexity: 8.1304
Epoch [2/3], Step [1039/1618], Loss: 2.0130, Perplexity: 7.4860
Epoch [2/3], Step [1040/1618], Loss: 2.0173, Perplexity: 7.5182
Epoch [2/3], Step [1041/1618], Loss: 1.9021, Perplexity: 6.6998
Epoch [2/3], Step [1042/1618], Loss: 2.3567, Perplexity: 10.5562
Epoch [2/3], Step [1043/1618], Loss: 2.5887, Perplexity: 13.3127
Epoch [2/3], Step [1044/1618], Loss: 2.0098, Perplexity: 7.4617
Epoch [2/3], Step [1045/1618], Loss: 2.2502, Perplexity: 9.4898
Epoch [2/3], Step [1046/1618], Loss: 2.0716, Perplexity: 7.9377
Epoch [2/3], Step [1047/1618], Loss: 2.1135, Perplexity: 8.2775
Epoch [2/3], Step [1048/1618], Loss: 2.0641, Perplexity: 7.8783
Epoch [2/3], Step [1049/1618], Loss: 2.2166, Perplexity: 9.1764
Epoch [2/3], Step [1050/1618], Loss: 2.1796, Perplexity: 8.8431
Epoch [2/3], Step [1051/1618], Loss: 2.0451, Perplexity: 7.7296
Epoch [2/3], Step [1052/1618], Loss: 1.9732, Perplexity: 7.1933
Epoch [2/3], Step [1053/1618], Loss: 2.0470, Perplexity: 7.7446
Epoch [2/3], Step [1054/1618], Loss: 2.3070, Perplexity: 10.0442
Epoch [2/3], Step [1055/1618], Loss: 2.0237, Perplexity: 7.5660
Epoch [2/3], Step [1056/1618], Loss: 1.9729, Perplexity: 7.1918
Epoch [2/3], Step [1057/1618], Loss: 1.9685, Perplexity: 7.1599
Epoch [2/3], Step [1058/1618], Loss: 2.0062, Perplexity: 7.4351
Epoch [2/3], Step [1059/1618], Loss: 2.0799, Perplexity: 8.0037
Epoch [2/3], Step [1060/1618], Loss: 2.3160, Perplexity: 10.1346
Epoch [2/3], Step [1061/1618], Loss: 1.9929, Perplexity: 7.3366
Epoch [2/3], Step [1062/1618], Loss: 2.1930, Perplexity: 8.9618
Epoch [2/3], Step [1063/1618], Loss: 2.0781, Perplexity: 7.9892
Epoch [2/3], Step [1064/1618], Loss: 2.1559, Perplexity: 8.6360
Epoch [2/3], Step [1065/1618], Loss: 2.0466, Perplexity: 7.7415
Epoch [2/3], Step [1066/1618], Loss: 2.6937, Perplexity: 14.7859
Epoch [2/3], Step [1067/1618], Loss: 1.9601, Perplexity: 7.1001
Epoch [2/3], Step [1068/1618], Loss: 2.4227, Perplexity: 11.2757
Epoch [2/3], Step [1069/1618], Loss: 2.0411, Perplexity: 7.6992
Epoch [2/3], Step [1070/1618], Loss: 2.3189, Perplexity: 10.1640
Epoch [2/3], Step [1071/1618], Loss: 2.0004, Perplexity: 7.3921
Epoch [2/3], Step [1072/1618], Loss: 1.9972, Perplexity: 7.3684
Epoch [2/3], Step [1073/1618], Loss: 2.0950, Perplexity: 8.1251
Epoch [2/3], Step [1074/1618], Loss: 1.9473, Perplexity: 7.0098
Epoch [2/3], Step [1075/1618], Loss: 2.8589, Perplexity: 17.4419
Epoch [2/3], Step [1076/1618], Loss: 2.1230, Perplexity: 8.3564
Epoch [2/3], Step [1077/1618], Loss: 2.0042, Perplexity: 7.4201
Epoch [2/3], Step [1078/1618], Loss: 2.0285, Perplexity: 7.6029
Epoch [2/3], Step [1079/1618], Loss: 2.0462, Perplexity: 7.7384
Epoch [2/3], Step [1080/1618], Loss: 1.9585, Perplexity: 7.0887
Epoch [2/3], Step [1081/1618], Loss: 2.2596, Perplexity: 9.5795
Epoch [2/3], Step [1082/1618], Loss: 2.1756, Perplexity: 8.8072
Epoch [2/3], Step [1083/1618], Loss: 2.1047, Perplexity: 8.2045
Epoch [2/3], Step [1084/1618], Loss: 2.0236, Perplexity: 7.5655
Epoch [2/3], Step [1085/1618], Loss: 1.9989, Perplexity: 7.3807
Epoch [2/3], Step [1086/1618], Loss: 2.0516, Perplexity: 7.7803
Epoch [2/3], Step [1087/1618], Loss: 1.9570, Perplexity: 7.0779
Epoch [2/3], Step [1088/1618], Loss: 2.1512, Perplexity: 8.5952
Epoch [2/3], Step [1089/1618], Loss: 2.1018, Perplexity: 8.1808
Epoch [2/3], Step [1090/1618], Loss: 1.9613, Perplexity: 7.1083
Epoch [2/3], Step [1091/1618], Loss: 2.0255, Perplexity: 7.5801
Epoch [2/3], Step [1092/1618], Loss: 2.2930, Perplexity: 9.9044
Epoch [2/3], Step [1093/1618], Loss: 2.1479, Perplexity: 8.5666
Epoch [2/3], Step [1094/1618], Loss: 1.9899, Perplexity: 7.3150
Epoch [2/3], Step [1095/1618], Loss: 2.4341, Perplexity: 11.4061
Epoch [2/3], Step [1096/1618], Loss: 2.0407, Perplexity: 7.6961
Epoch [2/3], Step [1097/1618], Loss: 2.0323, Perplexity: 7.6315
Epoch [2/3], Step [1098/1618], Loss: 2.0456, Perplexity: 7.7341
Epoch [2/3], Step [1099/1618], Loss: 2.0447, Perplexity: 7.7270
Epoch [2/3], Step [1100/1618], Loss: 2.0535, Perplexity: 7.7955
Epoch [2/3], Step [1101/1618], Loss: 2.0157, Perplexity: 7.5059
Epoch [2/3], Step [1102/1618], Loss: 2.0654, Perplexity: 7.8882
Epoch [2/3], Step [1103/1618], Loss: 1.8768, Perplexity: 6.5327
Epoch [2/3], Step [1104/1618], Loss: 2.3065, Perplexity: 10.0394
Epoch [2/3], Step [1105/1618], Loss: 1.9606, Perplexity: 7.1038
Epoch [2/3], Step [1106/1618], Loss: 1.9285, Perplexity: 6.8793
Epoch [2/3], Step [1107/1618], Loss: 2.0432, Perplexity: 7.7156
Epoch [2/3], Step [1108/1618], Loss: 2.0032, Perplexity: 7.4129
Epoch [2/3], Step [1109/1618], Loss: 2.2944, Perplexity: 9.9188
Epoch [2/3], Step [1110/1618], Loss: 2.1107, Perplexity: 8.2537
Epoch [2/3], Step [1111/1618], Loss: 1.9296, Perplexity: 6.8866
Epoch [2/3], Step [1112/1618], Loss: 1.9577, Perplexity: 7.0828
Epoch [2/3], Step [1113/1618], Loss: 2.0224, Perplexity: 7.5567
Epoch [2/3], Step [1114/1618], Loss: 2.2067, Perplexity: 9.0856
Epoch [2/3], Step [1115/1618], Loss: 1.8837, Perplexity: 6.5781
Epoch [2/3], Step [1116/1618], Loss: 2.7072, Perplexity: 14.9874
Epoch [2/3], Step [1117/1618], Loss: 2.0605, Perplexity: 7.8502
Epoch [2/3], Step [1118/1618], Loss: 1.9344, Perplexity: 6.9196
Epoch [2/3], Step [1119/1618], Loss: 2.1608, Perplexity: 8.6783
Epoch [2/3], Step [1120/1618], Loss: 2.7903, Perplexity: 16.2853
Epoch [2/3], Step [1121/1618], Loss: 2.0549, Perplexity: 7.8057
Epoch [2/3], Step [1122/1618], Loss: 2.6778, Perplexity: 14.5526
Epoch [2/3], Step [1123/1618], Loss: 2.2615, Perplexity: 9.5978
Epoch [2/3], Step [1124/1618], Loss: 2.0625, Perplexity: 7.8656
Epoch [2/3], Step [1125/1618], Loss: 1.9651, Perplexity: 7.1357
Epoch [2/3], Step [1126/1618], Loss: 2.0794, Perplexity: 7.9996
Epoch [2/3], Step [1127/1618], Loss: 1.9876, Perplexity: 7.2982
Epoch [2/3], Step [1128/1618], Loss: 2.1043, Perplexity: 8.2013
Epoch [2/3], Step [1129/1618], Loss: 1.9505, Perplexity: 7.0319
Epoch [2/3], Step [1130/1618], Loss: 2.3707, Perplexity: 10.7047
Epoch [2/3], Step [1131/1618], Loss: 2.3797, Perplexity: 10.8020
Epoch [2/3], Step [1132/1618], Loss: 2.0161, Perplexity: 7.5091
Epoch [2/3], Step [1133/1618], Loss: 2.0657, Perplexity: 7.8907
Epoch [2/3], Step [1134/1618], Loss: 2.0464, Perplexity: 7.7400
Epoch [2/3], Step [1135/1618], Loss: 2.2832, Perplexity: 9.8083
Epoch [2/3], Step [1136/1618], Loss: 1.9670, Perplexity: 7.1489
Epoch [2/3], Step [1137/1618], Loss: 4.0982, Perplexity: 60.2292
Epoch [2/3], Step [1138/1618], Loss: 2.3474, Perplexity: 10.4581
Epoch [2/3], Step [1139/1618], Loss: 2.2503, Perplexity: 9.4902
Epoch [2/3], Step [1140/1618], Loss: 1.9903, Perplexity: 7.3180
Epoch [2/3], Step [1141/1618], Loss: 2.0626, Perplexity: 7.8662
Epoch [2/3], Step [1142/1618], Loss: 2.0225, Perplexity: 7.5572
Epoch [2/3], Step [1143/1618], Loss: 2.0837, Perplexity: 8.0345
Epoch [2/3], Step [1144/1618], Loss: 2.4671, Perplexity: 11.7877
Epoch [2/3], Step [1145/1618], Loss: 2.0603, Perplexity: 7.8481
Epoch [2/3], Step [1146/1618], Loss: 1.9370, Perplexity: 6.9382
Epoch [2/3], Step [1147/1618], Loss: 2.0996, Perplexity: 8.1627
Epoch [2/3], Step [1148/1618], Loss: 1.9666, Perplexity: 7.1460
Epoch [2/3], Step [1149/1618], Loss: 2.2615, Perplexity: 9.5973
Epoch [2/3], Step [1150/1618], Loss: 2.2799, Perplexity: 9.7759
Epoch [2/3], Step [1151/1618], Loss: 2.0335, Perplexity: 7.6406
Epoch [2/3], Step [1152/1618], Loss: 2.2685, Perplexity: 9.6650
Epoch [2/3], Step [1153/1618], Loss: 1.9611, Perplexity: 7.1073
Epoch [2/3], Step [1154/1618], Loss: 2.0080, Perplexity: 7.4481
Epoch [2/3], Step [1155/1618], Loss: 2.0883, Perplexity: 8.0711
Epoch [2/3], Step [1156/1618], Loss: 2.0275, Perplexity: 7.5949
Epoch [2/3], Step [1157/1618], Loss: 2.0112, Perplexity: 7.4721
Epoch [2/3], Step [1158/1618], Loss: 2.1195, Perplexity: 8.3271
Epoch [2/3], Step [1159/1618], Loss: 2.0946, Perplexity: 8.1222
Epoch [2/3], Step [1160/1618], Loss: 2.0304, Perplexity: 7.6175
Epoch [2/3], Step [1161/1618], Loss: 2.1145, Perplexity: 8.2853
Epoch [2/3], Step [1162/1618], Loss: 2.0951, Perplexity: 8.1259
Epoch [2/3], Step [1163/1618], Loss: 2.0101, Perplexity: 7.4637
Epoch [2/3], Step [1164/1618], Loss: 2.5849, Perplexity: 13.2619
Epoch [2/3], Step [1165/1618], Loss: 1.9473, Perplexity: 7.0097
Epoch [2/3], Step [1166/1618], Loss: 1.9768, Perplexity: 7.2195
Epoch [2/3], Step [1167/1618], Loss: 1.9558, Perplexity: 7.0692
Epoch [2/3], Step [1168/1618], Loss: 1.9859, Perplexity: 7.2855
Epoch [2/3], Step [1169/1618], Loss: 2.1980, Perplexity: 9.0066
Epoch [2/3], Step [1170/1618], Loss: 2.0103, Perplexity: 7.4658
Epoch [2/3], Step [1171/1618], Loss: 2.0875, Perplexity: 8.0648
Epoch [2/3], Step [1172/1618], Loss: 1.9672, Perplexity: 7.1506
Epoch [2/3], Step [1173/1618], Loss: 2.0629, Perplexity: 7.8687
Epoch [2/3], Step [1174/1618], Loss: 2.3785, Perplexity: 10.7883
Epoch [2/3], Step [1175/1618], Loss: 1.9433, Perplexity: 6.9818
Epoch [2/3], Step [1176/1618], Loss: 2.4345, Perplexity: 11.4106
Epoch [2/3], Step [1177/1618], Loss: 2.0572, Perplexity: 7.8243
Epoch [2/3], Step [1178/1618], Loss: 1.9788, Perplexity: 7.2343
Epoch [2/3], Step [1179/1618], Loss: 2.0308, Perplexity: 7.6201
Epoch [2/3], Step [1180/1618], Loss: 2.2239, Perplexity: 9.2436
Epoch [2/3], Step [1181/1618], Loss: 2.0476, Perplexity: 7.7495
Epoch [2/3], Step [1182/1618], Loss: 2.1991, Perplexity: 9.0169
Epoch [2/3], Step [1183/1618], Loss: 1.9962, Perplexity: 7.3608
Epoch [2/3], Step [1184/1618], Loss: 2.0504, Perplexity: 7.7708
Epoch [2/3], Step [1185/1618], Loss: 2.1659, Perplexity: 8.7228
Epoch [2/3], Step [1186/1618], Loss: 1.9265, Perplexity: 6.8657
Epoch [2/3], Step [1187/1618], Loss: 1.9428, Perplexity: 6.9780
Epoch [2/3], Step [1188/1618], Loss: 2.4467, Perplexity: 11.5500
Epoch [2/3], Step [1189/1618], Loss: 2.2368, Perplexity: 9.3630
Epoch [2/3], Step [1190/1618], Loss: 2.2614, Perplexity: 9.5965
Epoch [2/3], Step [1191/1618], Loss: 2.3853, Perplexity: 10.8621
Epoch [2/3], Step [1192/1618], Loss: 2.1874, Perplexity: 8.9124
Epoch [2/3], Step [1193/1618], Loss: 2.0118, Perplexity: 7.4768
Epoch [2/3], Step [1194/1618], Loss: 1.9372, Perplexity: 6.9391
Epoch [2/3], Step [1195/1618], Loss: 2.1011, Perplexity: 8.1749
Epoch [2/3], Step [1196/1618], Loss: 2.3161, Perplexity: 10.1359
Epoch [2/3], Step [1197/1618], Loss: 2.0113, Perplexity: 7.4734
Epoch [2/3], Step [1198/1618], Loss: 2.1144, Perplexity: 8.2849
Epoch [2/3], Step [1199/1618], Loss: 2.1169, Perplexity: 8.3056
Epoch [2/3], Step [1200/1618], Loss: 1.9363, Perplexity: 6.9330
Epoch [2/3], Step [1201/1618], Loss: 2.2421, Perplexity: 9.4135
Epoch [2/3], Step [1202/1618], Loss: 2.0295, Perplexity: 7.6106
Epoch [2/3], Step [1203/1618], Loss: 2.0590, Perplexity: 7.8382
Epoch [2/3], Step [1204/1618], Loss: 2.0490, Perplexity: 7.7603
Epoch [2/3], Step [1205/1618], Loss: 1.9289, Perplexity: 6.8818
Epoch [2/3], Step [1206/1618], Loss: 1.9376, Perplexity: 6.9420
Epoch [2/3], Step [1207/1618], Loss: 2.0528, Perplexity: 7.7896
Epoch [2/3], Step [1208/1618], Loss: 2.2192, Perplexity: 9.2003
Epoch [2/3], Step [1209/1618], Loss: 2.0282, Perplexity: 7.6004
Epoch [2/3], Step [1210/1618], Loss: 2.0194, Perplexity: 7.5335
Epoch [2/3], Step [1211/1618], Loss: 2.0944, Perplexity: 8.1206
Epoch [2/3], Step [1212/1618], Loss: 1.8670, Perplexity: 6.4691
Epoch [2/3], Step [1213/1618], Loss: 1.9964, Perplexity: 7.3628
Epoch [2/3], Step [1214/1618], Loss: 1.9852, Perplexity: 7.2807
Epoch [2/3], Step [1215/1618], Loss: 2.7340, Perplexity: 15.3938
Epoch [2/3], Step [1216/1618], Loss: 2.5295, Perplexity: 12.5469
Epoch [2/3], Step [1217/1618], Loss: 2.0672, Perplexity: 7.9028
Epoch [2/3], Step [1218/1618], Loss: 2.0241, Perplexity: 7.5693
Epoch [2/3], Step [1219/1618], Loss: 2.0595, Perplexity: 7.8424
Epoch [2/3], Step [1220/1618], Loss: 1.9864, Perplexity: 7.2895
Epoch [2/3], Step [1221/1618], Loss: 2.7138, Perplexity: 15.0864
Epoch [2/3], Step [1222/1618], Loss: 1.9594, Perplexity: 7.0951
Epoch [2/3], Step [1223/1618], Loss: 2.0473, Perplexity: 7.7468
Epoch [2/3], Step [1224/1618], Loss: 2.1031, Perplexity: 8.1918
Epoch [2/3], Step [1225/1618], Loss: 2.0112, Perplexity: 7.4726
Epoch [2/3], Step [1226/1618], Loss: 2.0574, Perplexity: 7.8259
Epoch [2/3], Step [1227/1618], Loss: 2.0813, Perplexity: 8.0146
Epoch [2/3], Step [1228/1618], Loss: 1.9334, Perplexity: 6.9132
Epoch [2/3], Step [1229/1618], Loss: 1.9973, Perplexity: 7.3691
Epoch [2/3], Step [1230/1618], Loss: 2.0437, Perplexity: 7.7189
Epoch [2/3], Step [1231/1618], Loss: 2.1269, Perplexity: 8.3886
Epoch [2/3], Step [1232/1618], Loss: 1.9142, Perplexity: 6.7815
Epoch [2/3], Step [1233/1618], Loss: 1.9047, Perplexity: 6.7171
Epoch [2/3], Step [1234/1618], Loss: 2.1297, Perplexity: 8.4120
Epoch [2/3], Step [1235/1618], Loss: 2.0354, Perplexity: 7.6555
Epoch [2/3], Step [1236/1618], Loss: 2.0208, Perplexity: 7.5445
Epoch [2/3], Step [1237/1618], Loss: 1.9815, Perplexity: 7.2535
Epoch [2/3], Step [1238/1618], Loss: 2.1300, Perplexity: 8.4147
Epoch [2/3], Step [1239/1618], Loss: 1.8838, Perplexity: 6.5788
Epoch [2/3], Step [1240/1618], Loss: 2.1519, Perplexity: 8.6014
Epoch [2/3], Step [1241/1618], Loss: 1.9893, Perplexity: 7.3102
Epoch [2/3], Step [1242/1618], Loss: 2.0741, Perplexity: 7.9573
Epoch [2/3], Step [1243/1618], Loss: 1.9746, Perplexity: 7.2034
Epoch [2/3], Step [1244/1618], Loss: 2.1676, Perplexity: 8.7372
Epoch [2/3], Step [1245/1618], Loss: 1.9827, Perplexity: 7.2624
Epoch [2/3], Step [1246/1618], Loss: 2.0077, Perplexity: 7.4461
Epoch [2/3], Step [1247/1618], Loss: 1.9960, Perplexity: 7.3598
Epoch [2/3], Step [1248/1618], Loss: 1.8905, Perplexity: 6.6229
Epoch [2/3], Step [1249/1618], Loss: 1.8839, Perplexity: 6.5790
Epoch [2/3], Step [1250/1618], Loss: 2.0072, Perplexity: 7.4427
Epoch [2/3], Step [1251/1618], Loss: 2.0522, Perplexity: 7.7849
Epoch [2/3], Step [1252/1618], Loss: 2.0795, Perplexity: 8.0002
Epoch [2/3], Step [1253/1618], Loss: 2.0506, Perplexity: 7.7727
Epoch [2/3], Step [1254/1618], Loss: 2.0636, Perplexity: 7.8743
Epoch [2/3], Step [1255/1618], Loss: 2.3730, Perplexity: 10.7294
Epoch [2/3], Step [1256/1618], Loss: 1.9764, Perplexity: 7.2169
Epoch [2/3], Step [1257/1618], Loss: 2.0503, Perplexity: 7.7700
Epoch [2/3], Step [1258/1618], Loss: 2.0456, Perplexity: 7.7339
Epoch [2/3], Step [1259/1618], Loss: 2.0601, Perplexity: 7.8470
Epoch [2/3], Step [1260/1618], Loss: 2.0090, Perplexity: 7.4558
Epoch [2/3], Step [1261/1618], Loss: 1.9578, Perplexity: 7.0839
Epoch [2/3], Step [1262/1618], Loss: 2.0073, Perplexity: 7.4432
Epoch [2/3], Step [1263/1618], Loss: 1.9978, Perplexity: 7.3725
Epoch [2/3], Step [1264/1618], Loss: 2.1927, Perplexity: 8.9596
Epoch [2/3], Step [1265/1618], Loss: 1.9863, Perplexity: 7.2882
Epoch [2/3], Step [1266/1618], Loss: 2.0493, Perplexity: 7.7624
Epoch [2/3], Step [1267/1618], Loss: 2.1111, Perplexity: 8.2571
Epoch [2/3], Step [1268/1618], Loss: 2.3938, Perplexity: 10.9551
Epoch [2/3], Step [1269/1618], Loss: 1.9679, Perplexity: 7.1556
Epoch [2/3], Step [1270/1618], Loss: 1.9567, Perplexity: 7.0757
Epoch [2/3], Step [1271/1618], Loss: 1.9898, Perplexity: 7.3141
Epoch [2/3], Step [1272/1618], Loss: 2.0026, Perplexity: 7.4084
Epoch [2/3], Step [1273/1618], Loss: 1.9820, Perplexity: 7.2570
Epoch [2/3], Step [1274/1618], Loss: 2.5218, Perplexity: 12.4504
Epoch [2/3], Step [1275/1618], Loss: 2.3236, Perplexity: 10.2121
Epoch [2/3], Step [1276/1618], Loss: 2.1809, Perplexity: 8.8542
Epoch [2/3], Step [1277/1618], Loss: 1.8797, Perplexity: 6.5515
Epoch [2/3], Step [1278/1618], Loss: 2.0033, Perplexity: 7.4134
Epoch [2/3], Step [1279/1618], Loss: 2.8494, Perplexity: 17.2772
Epoch [2/3], Step [1280/1618], Loss: 1.8479, Perplexity: 6.3464
Epoch [2/3], Step [1281/1618], Loss: 2.5111, Perplexity: 12.3184
Epoch [2/3], Step [1282/1618], Loss: 2.0494, Perplexity: 7.7631
Epoch [2/3], Step [1283/1618], Loss: 2.3737, Perplexity: 10.7375
Epoch [2/3], Step [1284/1618], Loss: 2.0349, Perplexity: 7.6514
Epoch [2/3], Step [1285/1618], Loss: 2.1848, Perplexity: 8.8890
Epoch [2/3], Step [1286/1618], Loss: 1.8871, Perplexity: 6.5999
Epoch [2/3], Step [1287/1618], Loss: 1.9500, Perplexity: 7.0284
Epoch [2/3], Step [1288/1618], Loss: 1.9246, Perplexity: 6.8526
Epoch [2/3], Step [1289/1618], Loss: 2.0162, Perplexity: 7.5097
Epoch [2/3], Step [1290/1618], Loss: 2.0539, Perplexity: 7.7981
Epoch [2/3], Step [1291/1618], Loss: 1.9909, Perplexity: 7.3218
Epoch [2/3], Step [1292/1618], Loss: 2.1903, Perplexity: 8.9378
Epoch [2/3], Step [1293/1618], Loss: 2.0442, Perplexity: 7.7230
Epoch [2/3], Step [1294/1618], Loss: 1.9920, Perplexity: 7.3302
Epoch [2/3], Step [1295/1618], Loss: 2.0028, Perplexity: 7.4098
Epoch [2/3], Step [1296/1618], Loss: 1.9517, Perplexity: 7.0407
Epoch [2/3], Step [1297/1618], Loss: 1.9994, Perplexity: 7.3848
Epoch [2/3], Step [1298/1618], Loss: 1.9974, Perplexity: 7.3697
Epoch [2/3], Step [1299/1618], Loss: 2.5742, Perplexity: 13.1207
Epoch [2/3], Step [1300/1618], Loss: 2.6161, Perplexity: 13.6818
Epoch [2/3], Step [1301/1618], Loss: 2.0367, Perplexity: 7.6651
Epoch [2/3], Step [1302/1618], Loss: 2.0002, Perplexity: 7.3907
Epoch [2/3], Step [1303/1618], Loss: 2.1563, Perplexity: 8.6393
Epoch [2/3], Step [1304/1618], Loss: 2.3345, Perplexity: 10.3242
Epoch [2/3], Step [1305/1618], Loss: 2.3021, Perplexity: 9.9955
Epoch [2/3], Step [1306/1618], Loss: 2.0051, Perplexity: 7.4265
Epoch [2/3], Step [1307/1618], Loss: 1.9977, Perplexity: 7.3724
Epoch [2/3], Step [1308/1618], Loss: 1.9983, Perplexity: 7.3764
Epoch [2/3], Step [1309/1618], Loss: 1.9893, Perplexity: 7.3103
Epoch [2/3], Step [1310/1618], Loss: 1.9704, Perplexity: 7.1735
Epoch [2/3], Step [1311/1618], Loss: 2.0113, Perplexity: 7.4728
Epoch [2/3], Step [1312/1618], Loss: 1.9602, Perplexity: 7.1010
Epoch [2/3], Step [1313/1618], Loss: 2.1096, Perplexity: 8.2451
Epoch [2/3], Step [1314/1618], Loss: 2.1606, Perplexity: 8.6761
Epoch [2/3], Step [1315/1618], Loss: 1.9360, Perplexity: 6.9311
Epoch [2/3], Step [1316/1618], Loss: 2.0284, Perplexity: 7.6022
Epoch [2/3], Step [1317/1618], Loss: 1.9784, Perplexity: 7.2312
Epoch [2/3], Step [1318/1618], Loss: 2.3318, Perplexity: 10.2965
Epoch [2/3], Step [1319/1618], Loss: 2.5060, Perplexity: 12.2558
Epoch [2/3], Step [1320/1618], Loss: 2.0473, Perplexity: 7.7473
Epoch [2/3], Step [1321/1618], Loss: 1.8588, Perplexity: 6.4160
Epoch [2/3], Step [1322/1618], Loss: 2.1634, Perplexity: 8.7009
Epoch [2/3], Step [1323/1618], Loss: 2.3898, Perplexity: 10.9108
Epoch [2/3], Step [1324/1618], Loss: 2.0034, Perplexity: 7.4141
Epoch [2/3], Step [1325/1618], Loss: 1.9545, Perplexity: 7.0604
Epoch [2/3], Step [1326/1618], Loss: 2.4288, Perplexity: 11.3458
Epoch [2/3], Step [1327/1618], Loss: 1.9481, Perplexity: 7.0152
Epoch [2/3], Step [1328/1618], Loss: 1.9732, Perplexity: 7.1939
Epoch [2/3], Step [1329/1618], Loss: 2.5522, Perplexity: 12.8349
Epoch [2/3], Step [1330/1618], Loss: 2.0539, Perplexity: 7.7983
Epoch [2/3], Step [1331/1618], Loss: 2.0445, Perplexity: 7.7252
Epoch [2/3], Step [1332/1618], Loss: 2.0067, Perplexity: 7.4391
Epoch [2/3], Step [1333/1618], Loss: 1.9465, Perplexity: 7.0039
Epoch [2/3], Step [1334/1618], Loss: 2.0927, Perplexity: 8.1069
Epoch [2/3], Step [1335/1618], Loss: 2.4454, Perplexity: 11.5349
Epoch [2/3], Step [1336/1618], Loss: 2.0202, Perplexity: 7.5396
Epoch [2/3], Step [1337/1618], Loss: 2.0095, Perplexity: 7.4596
Epoch [2/3], Step [1338/1618], Loss: 2.0994, Perplexity: 8.1614
Epoch [2/3], Step [1339/1618], Loss: 1.9831, Perplexity: 7.2649
Epoch [2/3], Step [1340/1618], Loss: 2.0575, Perplexity: 7.8264
Epoch [2/3], Step [1341/1618], Loss: 2.0033, Perplexity: 7.4138
Epoch [2/3], Step [1342/1618], Loss: 2.0343, Perplexity: 7.6468
Epoch [2/3], Step [1343/1618], Loss: 2.2373, Perplexity: 9.3676
Epoch [2/3], Step [1344/1618], Loss: 1.9322, Perplexity: 6.9049
Epoch [2/3], Step [1345/1618], Loss: 1.9057, Perplexity: 6.7239
Epoch [2/3], Step [1346/1618], Loss: 2.0846, Perplexity: 8.0411
Epoch [2/3], Step [1347/1618], Loss: 1.9675, Perplexity: 7.1527
Epoch [2/3], Step [1348/1618], Loss: 2.2338, Perplexity: 9.3357
Epoch [2/3], Step [1349/1618], Loss: 1.8632, Perplexity: 6.4443
Epoch [2/3], Step [1350/1618], Loss: 2.0148, Perplexity: 7.4994
Epoch [2/3], Step [1351/1618], Loss: 2.0986, Perplexity: 8.1548
Epoch [2/3], Step [1352/1618], Loss: 2.0308, Perplexity: 7.6202
Epoch [2/3], Step [1353/1618], Loss: 1.9752, Perplexity: 7.2081
Epoch [2/3], Step [1354/1618], Loss: 1.9951, Perplexity: 7.3526
Epoch [2/3], Step [1355/1618], Loss: 1.9435, Perplexity: 6.9830
Epoch [2/3], Step [1356/1618], Loss: 2.0248, Perplexity: 7.5743
Epoch [2/3], Step [1357/1618], Loss: 1.9552, Perplexity: 7.0650
Epoch [2/3], Step [1358/1618], Loss: 2.0372, Perplexity: 7.6695
Epoch [2/3], Step [1359/1618], Loss: 2.0211, Perplexity: 7.5463
Epoch [2/3], Step [1360/1618], Loss: 2.1854, Perplexity: 8.8940
Epoch [2/3], Step [1361/1618], Loss: 2.0374, Perplexity: 7.6705
Epoch [2/3], Step [1362/1618], Loss: 2.2894, Perplexity: 9.8685
Epoch [2/3], Step [1363/1618], Loss: 1.9972, Perplexity: 7.3683
Epoch [2/3], Step [1364/1618], Loss: 1.9196, Perplexity: 6.8181
Epoch [2/3], Step [1365/1618], Loss: 1.9261, Perplexity: 6.8625
Epoch [2/3], Step [1366/1618], Loss: 2.0133, Perplexity: 7.4878
Epoch [2/3], Step [1367/1618], Loss: 1.9210, Perplexity: 6.8279
Epoch [2/3], Step [1368/1618], Loss: 2.0192, Perplexity: 7.5320
Epoch [2/3], Step [1369/1618], Loss: 1.9462, Perplexity: 7.0022
Epoch [2/3], Step [1370/1618], Loss: 2.3430, Perplexity: 10.4125
Epoch [2/3], Step [1371/1618], Loss: 1.9349, Perplexity: 6.9236
Epoch [2/3], Step [1372/1618], Loss: 2.1429, Perplexity: 8.5240
Epoch [2/3], Step [1373/1618], Loss: 1.9032, Perplexity: 6.7074
Epoch [2/3], Step [1374/1618], Loss: 2.0614, Perplexity: 7.8566
Epoch [2/3], Step [1375/1618], Loss: 2.0318, Perplexity: 7.6281
Epoch [2/3], Step [1376/1618], Loss: 2.2753, Perplexity: 9.7310
Epoch [2/3], Step [1377/1618], Loss: 1.9657, Perplexity: 7.1398
Epoch [2/3], Step [1378/1618], Loss: 2.2123, Perplexity: 9.1369
Epoch [2/3], Step [1379/1618], Loss: 2.0017, Perplexity: 7.4020
Epoch [2/3], Step [1380/1618], Loss: 2.0406, Perplexity: 7.6956
Epoch [2/3], Step [1381/1618], Loss: 2.0656, Perplexity: 7.8896
Epoch [2/3], Step [1382/1618], Loss: 2.1457, Perplexity: 8.5482
Epoch [2/3], Step [1383/1618], Loss: 1.9134, Perplexity: 6.7762
Epoch [2/3], Step [1384/1618], Loss: 2.1032, Perplexity: 8.1926
Epoch [2/3], Step [1385/1618], Loss: 2.0010, Perplexity: 7.3961
Epoch [2/3], Step [1386/1618], Loss: 1.9876, Perplexity: 7.2983
Epoch [2/3], Step [1387/1618], Loss: 1.9054, Perplexity: 6.7222
Epoch [2/3], Step [1388/1618], Loss: 1.9061, Perplexity: 6.7271
Epoch [2/3], Step [1389/1618], Loss: 2.1007, Perplexity: 8.1717
Epoch [2/3], Step [1390/1618], Loss: 2.3926, Perplexity: 10.9420
Epoch [2/3], Step [1391/1618], Loss: 2.0762, Perplexity: 7.9744
Epoch [2/3], Step [1392/1618], Loss: 2.1222, Perplexity: 8.3497
Epoch [2/3], Step [1393/1618], Loss: 1.9590, Perplexity: 7.0924
Epoch [2/3], Step [1394/1618], Loss: 2.0185, Perplexity: 7.5270
Epoch [2/3], Step [1395/1618], Loss: 2.0427, Perplexity: 7.7115
Epoch [2/3], Step [1396/1618], Loss: 2.2501, Perplexity: 9.4884
Epoch [2/3], Step [1397/1618], Loss: 2.0074, Perplexity: 7.4437
Epoch [2/3], Step [1398/1618], Loss: 2.1182, Perplexity: 8.3161
Epoch [2/3], Step [1399/1618], Loss: 2.0254, Perplexity: 7.5788
Epoch [2/3], Step [1400/1618], Loss: 1.9626, Perplexity: 7.1177
Epoch [2/3], Step [1401/1618], Loss: 2.0462, Perplexity: 7.7384
Epoch [2/3], Step [1402/1618], Loss: 2.0623, Perplexity: 7.8638
Epoch [2/3], Step [1403/1618], Loss: 1.9528, Perplexity: 7.0485
Epoch [2/3], Step [1404/1618], Loss: 2.0351, Perplexity: 7.6530
Epoch [2/3], Step [1405/1618], Loss: 1.9345, Perplexity: 6.9204
Epoch [2/3], Step [1406/1618], Loss: 2.0544, Perplexity: 7.8019
Epoch [2/3], Step [1407/1618], Loss: 2.0271, Perplexity: 7.5923
Epoch [2/3], Step [1408/1618], Loss: 1.9447, Perplexity: 6.9917
Epoch [2/3], Step [1409/1618], Loss: 2.0011, Perplexity: 7.3975
Epoch [2/3], Step [1410/1618], Loss: 2.0708, Perplexity: 7.9310
Epoch [2/3], Step [1411/1618], Loss: 2.1214, Perplexity: 8.3431
Epoch [2/3], Step [1412/1618], Loss: 1.9538, Perplexity: 7.0554
Epoch [2/3], Step [1413/1618], Loss: 1.9723, Perplexity: 7.1871
Epoch [2/3], Step [1414/1618], Loss: 2.2388, Perplexity: 9.3823
Epoch [2/3], Step [1415/1618], Loss: 2.0578, Perplexity: 7.8285
Epoch [2/3], Step [1416/1618], Loss: 3.2405, Perplexity: 25.5467
Epoch [2/3], Step [1417/1618], Loss: 1.9396, Perplexity: 6.9560
Epoch [2/3], Step [1418/1618], Loss: 1.9562, Perplexity: 7.0724
Epoch [2/3], Step [1419/1618], Loss: 1.8817, Perplexity: 6.5646
Epoch [2/3], Step [1420/1618], Loss: 1.9970, Perplexity: 7.3666
Epoch [2/3], Step [1421/1618], Loss: 2.6791, Perplexity: 14.5713
Epoch [2/3], Step [1422/1618], Loss: 2.8597, Perplexity: 17.4569
Epoch [2/3], Step [1423/1618], Loss: 2.0029, Perplexity: 7.4103
Epoch [2/3], Step [1424/1618], Loss: 2.0066, Perplexity: 7.4383
Epoch [2/3], Step [1425/1618], Loss: 1.9689, Perplexity: 7.1626
Epoch [2/3], Step [1426/1618], Loss: 2.0394, Perplexity: 7.6863
Epoch [2/3], Step [1427/1618], Loss: 2.1067, Perplexity: 8.2213
Epoch [2/3], Step [1428/1618], Loss: 1.8993, Perplexity: 6.6812
Epoch [2/3], Step [1429/1618], Loss: 1.8971, Perplexity: 6.6665
Epoch [2/3], Step [1430/1618], Loss: 2.0414, Perplexity: 7.7011
Epoch [2/3], Step [1431/1618], Loss: 1.9716, Perplexity: 7.1825
Epoch [2/3], Step [1432/1618], Loss: 2.0226, Perplexity: 7.5577
Epoch [2/3], Step [1433/1618], Loss: 2.5454, Perplexity: 12.7479
Epoch [2/3], Step [1434/1618], Loss: 1.9688, Perplexity: 7.1619
Epoch [2/3], Step [1435/1618], Loss: 2.1598, Perplexity: 8.6690
Epoch [2/3], Step [1436/1618], Loss: 1.9835, Perplexity: 7.2679
Epoch [2/3], Step [1437/1618], Loss: 2.3532, Perplexity: 10.5188
Epoch [2/3], Step [1438/1618], Loss: 2.0437, Perplexity: 7.7190
Epoch [2/3], Step [1439/1618], Loss: 1.9918, Perplexity: 7.3289
Epoch [2/3], Step [1440/1618], Loss: 1.9690, Perplexity: 7.1635
Epoch [2/3], Step [1441/1618], Loss: 1.9586, Perplexity: 7.0897
Epoch [2/3], Step [1442/1618], Loss: 1.9256, Perplexity: 6.8589
Epoch [2/3], Step [1443/1618], Loss: 2.2714, Perplexity: 9.6925
Epoch [2/3], Step [1444/1618], Loss: 2.5024, Perplexity: 12.2112
Epoch [2/3], Step [1445/1618], Loss: 2.1986, Perplexity: 9.0124
Epoch [2/3], Step [1446/1618], Loss: 1.9426, Perplexity: 6.9766
Epoch [2/3], Step [1447/1618], Loss: 2.0274, Perplexity: 7.5941
Epoch [2/3], Step [1448/1618], Loss: 2.0949, Perplexity: 8.1247
Epoch [2/3], Step [1449/1618], Loss: 1.8861, Perplexity: 6.5934
Epoch [2/3], Step [1450/1618], Loss: 1.9485, Perplexity: 7.0185
Epoch [2/3], Step [1451/1618], Loss: 2.0474, Perplexity: 7.7475
Epoch [2/3], Step [1452/1618], Loss: 2.0383, Perplexity: 7.6775
Epoch [2/3], Step [1453/1618], Loss: 2.0202, Perplexity: 7.5402
Epoch [2/3], Step [1454/1618], Loss: 2.1403, Perplexity: 8.5020
Epoch [2/3], Step [1455/1618], Loss: 2.1058, Perplexity: 8.2133
Epoch [2/3], Step [1456/1618], Loss: 1.9712, Perplexity: 7.1795
Epoch [2/3], Step [1457/1618], Loss: 2.0525, Perplexity: 7.7872
Epoch [2/3], Step [1458/1618], Loss: 1.9973, Perplexity: 7.3689
Epoch [2/3], Step [1459/1618], Loss: 2.0505, Perplexity: 7.7719
Epoch [2/3], Step [1460/1618], Loss: 2.1820, Perplexity: 8.8637
Epoch [2/3], Step [1461/1618], Loss: 2.6955, Perplexity: 14.8129
Epoch [2/3], Step [1462/1618], Loss: 2.3966, Perplexity: 10.9858
Epoch [2/3], Step [1463/1618], Loss: 2.0092, Perplexity: 7.4574
Epoch [2/3], Step [1464/1618], Loss: 2.3474, Perplexity: 10.4585
Epoch [2/3], Step [1465/1618], Loss: 2.2716, Perplexity: 9.6949
Epoch [2/3], Step [1466/1618], Loss: 2.1820, Perplexity: 8.8642
Epoch [2/3], Step [1467/1618], Loss: 2.0094, Perplexity: 7.4587
Epoch [2/3], Step [1468/1618], Loss: 2.0262, Perplexity: 7.5856
Epoch [2/3], Step [1469/1618], Loss: 2.1581, Perplexity: 8.6543
Epoch [2/3], Step [1470/1618], Loss: 2.0165, Perplexity: 7.5116
Epoch [2/3], Step [1471/1618], Loss: 2.0165, Perplexity: 7.5120
Epoch [2/3], Step [1472/1618], Loss: 1.9594, Perplexity: 7.0948
Epoch [2/3], Step [1473/1618], Loss: 2.0492, Perplexity: 7.7614
Epoch [2/3], Step [1474/1618], Loss: 2.5375, Perplexity: 12.6486
Epoch [2/3], Step [1475/1618], Loss: 1.9804, Perplexity: 7.2453
Epoch [2/3], Step [1476/1618], Loss: 1.9686, Perplexity: 7.1609
Epoch [2/3], Step [1477/1618], Loss: 2.0424, Perplexity: 7.7093
Epoch [2/3], Step [1478/1618], Loss: 1.9895, Perplexity: 7.3118
Epoch [2/3], Step [1479/1618], Loss: 2.0076, Perplexity: 7.4457
Epoch [2/3], Step [1480/1618], Loss: 1.9348, Perplexity: 6.9225
Epoch [2/3], Step [1481/1618], Loss: 2.1213, Perplexity: 8.3423
Epoch [2/3], Step [1482/1618], Loss: 2.0294, Perplexity: 7.6094
Epoch [2/3], Step [1483/1618], Loss: 1.8844, Perplexity: 6.5825
Epoch [2/3], Step [1484/1618], Loss: 1.9520, Perplexity: 7.0428
Epoch [2/3], Step [1485/1618], Loss: 2.5622, Perplexity: 12.9637
Epoch [2/3], Step [1486/1618], Loss: 1.9172, Perplexity: 6.8018
Epoch [2/3], Step [1487/1618], Loss: 1.9586, Perplexity: 7.0892
Epoch [2/3], Step [1488/1618], Loss: 3.0488, Perplexity: 21.0897
Epoch [2/3], Step [1489/1618], Loss: 2.0494, Perplexity: 7.7635
Epoch [2/3], Step [1490/1618], Loss: 1.9727, Perplexity: 7.1900
Epoch [2/3], Step [1491/1618], Loss: 2.0457, Perplexity: 7.7349
Epoch [2/3], Step [1492/1618], Loss: 1.9463, Perplexity: 7.0031
Epoch [2/3], Step [1493/1618], Loss: 1.9210, Perplexity: 6.8275
Epoch [2/3], Step [1494/1618], Loss: 2.0914, Perplexity: 8.0960
Epoch [2/3], Step [1495/1618], Loss: 1.9103, Perplexity: 6.7554
Epoch [2/3], Step [1496/1618], Loss: 2.3801, Perplexity: 10.8060
Epoch [2/3], Step [1497/1618], Loss: 1.9168, Perplexity: 6.7992
Epoch [2/3], Step [1498/1618], Loss: 2.4792, Perplexity: 11.9314
Epoch [2/3], Step [1499/1618], Loss: 1.9669, Perplexity: 7.1488
Epoch [2/3], Step [1500/1618], Loss: 1.9839, Perplexity: 7.2711
Epoch [2/3], Step [1501/1618], Loss: 1.9366, Perplexity: 6.9353
Epoch [2/3], Step [1502/1618], Loss: 1.9789, Perplexity: 7.2348
Epoch [2/3], Step [1503/1618], Loss: 2.2717, Perplexity: 9.6955
Epoch [2/3], Step [1504/1618], Loss: 1.9479, Perplexity: 7.0140
Epoch [2/3], Step [1505/1618], Loss: 2.0990, Perplexity: 8.1578
Epoch [2/3], Step [1506/1618], Loss: 1.9005, Perplexity: 6.6895
Epoch [2/3], Step [1507/1618], Loss: 1.9165, Perplexity: 6.7974
Epoch [2/3], Step [1508/1618], Loss: 1.9851, Perplexity: 7.2795
Epoch [2/3], Step [1509/1618], Loss: 1.9841, Perplexity: 7.2727
Epoch [2/3], Step [1510/1618], Loss: 1.9542, Perplexity: 7.0584
Epoch [2/3], Step [1511/1618], Loss: 2.4499, Perplexity: 11.5869
Epoch [2/3], Step [1512/1618], Loss: 2.0408, Perplexity: 7.6969
Epoch [2/3], Step [1513/1618], Loss: 2.1843, Perplexity: 8.8843
Epoch [2/3], Step [1514/1618], Loss: 2.0602, Perplexity: 7.8472
Epoch [2/3], Step [1515/1618], Loss: 1.9906, Perplexity: 7.3202
Epoch [2/3], Step [1516/1618], Loss: 2.0215, Perplexity: 7.5493
Epoch [2/3], Step [1517/1618], Loss: 1.9280, Perplexity: 6.8759
Epoch [2/3], Step [1518/1618], Loss: 1.9816, Perplexity: 7.2547
Epoch [2/3], Step [1519/1618], Loss: 1.8967, Perplexity: 6.6638
Epoch [2/3], Step [1520/1618], Loss: 1.9584, Perplexity: 7.0878
Epoch [2/3], Step [1521/1618], Loss: 2.1329, Perplexity: 8.4390
Epoch [2/3], Step [1522/1618], Loss: 2.0253, Perplexity: 7.5785
Epoch [2/3], Step [1523/1618], Loss: 2.4896, Perplexity: 12.0561
Epoch [2/3], Step [1524/1618], Loss: 1.9928, Perplexity: 7.3360
Epoch [2/3], Step [1525/1618], Loss: 2.2810, Perplexity: 9.7868
Epoch [2/3], Step [1526/1618], Loss: 2.4874, Perplexity: 12.0295
Epoch [2/3], Step [1527/1618], Loss: 2.0437, Perplexity: 7.7188
Epoch [2/3], Step [1528/1618], Loss: 2.3985, Perplexity: 11.0066
Epoch [2/3], Step [1529/1618], Loss: 1.9855, Perplexity: 7.2827
Epoch [2/3], Step [1530/1618], Loss: 1.9542, Perplexity: 7.0584
Epoch [2/3], Step [1531/1618], Loss: 2.2091, Perplexity: 9.1074
Epoch [2/3], Step [1532/1618], Loss: 2.0730, Perplexity: 7.9487
Epoch [2/3], Step [1533/1618], Loss: 2.0847, Perplexity: 8.0422
Epoch [2/3], Step [1534/1618], Loss: 1.9290, Perplexity: 6.8828
Epoch [2/3], Step [1535/1618], Loss: 2.0983, Perplexity: 8.1526
Epoch [2/3], Step [1536/1618], Loss: 1.9322, Perplexity: 6.9044
Epoch [2/3], Step [1537/1618], Loss: 1.9387, Perplexity: 6.9500
Epoch [2/3], Step [1538/1618], Loss: 2.3783, Perplexity: 10.7865
Epoch [2/3], Step [1539/1618], Loss: 2.2780, Perplexity: 9.7570
Epoch [2/3], Step [1540/1618], Loss: 2.0074, Perplexity: 7.4441
Epoch [2/3], Step [1541/1618], Loss: 2.0134, Perplexity: 7.4886
Epoch [2/3], Step [1542/1618], Loss: 1.8956, Perplexity: 6.6563
Epoch [2/3], Step [1543/1618], Loss: 2.1813, Perplexity: 8.8582
Epoch [2/3], Step [1544/1618], Loss: 2.2207, Perplexity: 9.2141
Epoch [2/3], Step [1545/1618], Loss: 1.9263, Perplexity: 6.8637
Epoch [2/3], Step [1546/1618], Loss: 2.0607, Perplexity: 7.8518
Epoch [2/3], Step [1547/1618], Loss: 1.8931, Perplexity: 6.6399
Epoch [2/3], Step [1548/1618], Loss: 2.2278, Perplexity: 9.2790
Epoch [2/3], Step [1549/1618], Loss: 1.9413, Perplexity: 6.9681
Epoch [2/3], Step [1550/1618], Loss: 1.9809, Perplexity: 7.2491
Epoch [2/3], Step [1551/1618], Loss: 2.0351, Perplexity: 7.6533
Epoch [2/3], Step [1552/1618], Loss: 2.4436, Perplexity: 11.5145
Epoch [2/3], Step [1553/1618], Loss: 2.1070, Perplexity: 8.2239
Epoch [2/3], Step [1554/1618], Loss: 2.1539, Perplexity: 8.6182
Epoch [2/3], Step [1555/1618], Loss: 2.1041, Perplexity: 8.2000
Epoch [2/3], Step [1556/1618], Loss: 2.0204, Perplexity: 7.5413
Epoch [2/3], Step [1557/1618], Loss: 2.1276, Perplexity: 8.3948
Epoch [2/3], Step [1558/1618], Loss: 1.9087, Perplexity: 6.7444
Epoch [2/3], Step [1559/1618], Loss: 1.9729, Perplexity: 7.1916
Epoch [2/3], Step [1560/1618], Loss: 1.9872, Perplexity: 7.2951
Epoch [2/3], Step [1561/1618], Loss: 1.9503, Perplexity: 7.0310
Epoch [2/3], Step [1562/1618], Loss: 1.9390, Perplexity: 6.9515
Epoch [2/3], Step [1563/1618], Loss: 2.0545, Perplexity: 7.8030
Epoch [2/3], Step [1564/1618], Loss: 1.9689, Perplexity: 7.1627
Epoch [2/3], Step [1565/1618], Loss: 2.2415, Perplexity: 9.4074
Epoch [2/3], Step [1566/1618], Loss: 2.0225, Perplexity: 7.5569
Epoch [2/3], Step [1567/1618], Loss: 1.9673, Perplexity: 7.1514
Epoch [2/3], Step [1568/1618], Loss: 2.0182, Perplexity: 7.5245
Epoch [2/3], Step [1569/1618], Loss: 1.9800, Perplexity: 7.2424
Epoch [2/3], Step [1570/1618], Loss: 2.3538, Perplexity: 10.5258
Epoch [2/3], Step [1571/1618], Loss: 1.9238, Perplexity: 6.8471
Epoch [2/3], Step [1572/1618], Loss: 2.0879, Perplexity: 8.0676
Epoch [2/3], Step [1573/1618], Loss: 2.0676, Perplexity: 7.9062
Epoch [2/3], Step [1574/1618], Loss: 1.9572, Perplexity: 7.0797
Epoch [2/3], Step [1575/1618], Loss: 1.9748, Perplexity: 7.2053
Epoch [2/3], Step [1576/1618], Loss: 2.0070, Perplexity: 7.4411
Epoch [2/3], Step [1577/1618], Loss: 2.0517, Perplexity: 7.7808
Epoch [2/3], Step [1578/1618], Loss: 2.0311, Perplexity: 7.6225
Epoch [2/3], Step [1579/1618], Loss: 2.0220, Perplexity: 7.5538
Epoch [2/3], Step [1580/1618], Loss: 2.1429, Perplexity: 8.5243
Epoch [2/3], Step [1581/1618], Loss: 2.1268, Perplexity: 8.3878
Epoch [2/3], Step [1582/1618], Loss: 1.9644, Perplexity: 7.1306
Epoch [2/3], Step [1583/1618], Loss: 2.0674, Perplexity: 7.9044
Epoch [2/3], Step [1584/1618], Loss: 2.0532, Perplexity: 7.7929
Epoch [2/3], Step [1585/1618], Loss: 1.9477, Perplexity: 7.0128
Epoch [2/3], Step [1586/1618], Loss: 2.0213, Perplexity: 7.5484
Epoch [2/3], Step [1587/1618], Loss: 2.5519, Perplexity: 12.8318
Epoch [2/3], Step [1588/1618], Loss: 1.9763, Perplexity: 7.2161
Epoch [2/3], Step [1589/1618], Loss: 1.8803, Perplexity: 6.5554
Epoch [2/3], Step [1590/1618], Loss: 1.9416, Perplexity: 6.9699
Epoch [2/3], Step [1591/1618], Loss: 2.0557, Perplexity: 7.8126
Epoch [2/3], Step [1592/1618], Loss: 2.4781, Perplexity: 11.9190
Epoch [2/3], Step [1593/1618], Loss: 2.3583, Perplexity: 10.5731
Epoch [2/3], Step [1594/1618], Loss: 1.9776, Perplexity: 7.2253
Epoch [2/3], Step [1595/1618], Loss: 2.2737, Perplexity: 9.7150
Epoch [2/3], Step [1596/1618], Loss: 1.9614, Perplexity: 7.1091
Epoch [2/3], Step [1597/1618], Loss: 1.9922, Perplexity: 7.3315
Epoch [2/3], Step [1598/1618], Loss: 2.1046, Perplexity: 8.2040
Epoch [2/3], Step [1599/1618], Loss: 1.8908, Perplexity: 6.6248
Epoch [2/3], Step [1600/1618], Loss: 2.0172, Perplexity: 7.5173
Epoch [2/3], Step [1601/1618], Loss: 2.3169, Perplexity: 10.1442
Epoch [2/3], Step [1602/1618], Loss: 2.0026, Perplexity: 7.4085
Epoch [2/3], Step [1603/1618], Loss: 2.0148, Perplexity: 7.4989
Epoch [2/3], Step [1604/1618], Loss: 2.2115, Perplexity: 9.1295
Epoch [2/3], Step [1605/1618], Loss: 1.9100, Perplexity: 6.7532
Epoch [2/3], Step [1606/1618], Loss: 2.0635, Perplexity: 7.8738
Epoch [2/3], Step [1607/1618], Loss: 1.9972, Perplexity: 7.3683
Epoch [2/3], Step [1608/1618], Loss: 1.9302, Perplexity: 6.8911
Epoch [2/3], Step [1609/1618], Loss: 1.9911, Perplexity: 7.3237
Epoch [2/3], Step [1610/1618], Loss: 1.9603, Perplexity: 7.1017
Epoch [2/3], Step [1611/1618], Loss: 2.0091, Perplexity: 7.4569
Epoch [2/3], Step [1612/1618], Loss: 1.9854, Perplexity: 7.2823
Epoch [2/3], Step [1613/1618], Loss: 1.9589, Perplexity: 7.0913
Epoch [2/3], Step [1614/1618], Loss: 1.9808, Perplexity: 7.2487
Epoch [2/3], Step [1615/1618], Loss: 1.9437, Perplexity: 6.9845
Epoch [2/3], Step [1616/1618], Loss: 2.0662, Perplexity: 7.8951
Epoch [2/3], Step [1617/1618], Loss: 2.4031, Perplexity: 11.0570
Epoch [2/3], Step [1618/1618], Loss: 2.0502, Perplexity: 7.7694
Epoch [3/3], Step [1/1618], Loss: 1.9335, Perplexity: 6.9133
Epoch [3/3], Step [2/1618], Loss: 1.9808, Perplexity: 7.2489
Epoch [3/3], Step [3/1618], Loss: 1.9338, Perplexity: 6.9154
Epoch [3/3], Step [4/1618], Loss: 1.9173, Perplexity: 6.8026
Epoch [3/3], Step [5/1618], Loss: 1.9332, Perplexity: 6.9118
Epoch [3/3], Step [6/1618], Loss: 1.9366, Perplexity: 6.9349
Epoch [3/3], Step [7/1618], Loss: 1.9962, Perplexity: 7.3612
Epoch [3/3], Step [8/1618], Loss: 2.1702, Perplexity: 8.7600
Epoch [3/3], Step [9/1618], Loss: 1.9797, Perplexity: 7.2407
Epoch [3/3], Step [10/1618], Loss: 1.9813, Perplexity: 7.2525
Epoch [3/3], Step [11/1618], Loss: 2.2693, Perplexity: 9.6726
Epoch [3/3], Step [12/1618], Loss: 1.9596, Perplexity: 7.0965
Epoch [3/3], Step [13/1618], Loss: 2.0235, Perplexity: 7.5651
Epoch [3/3], Step [14/1618], Loss: 1.9321, Perplexity: 6.9038
Epoch [3/3], Step [15/1618], Loss: 1.9259, Perplexity: 6.8614
Epoch [3/3], Step [16/1618], Loss: 1.8487, Perplexity: 6.3518
Epoch [3/3], Step [17/1618], Loss: 2.0293, Perplexity: 7.6091
Epoch [3/3], Step [18/1618], Loss: 1.9230, Perplexity: 6.8417
Epoch [3/3], Step [19/1618], Loss: 1.8610, Perplexity: 6.4300
Epoch [3/3], Step [20/1618], Loss: 1.9627, Perplexity: 7.1189
Epoch [3/3], Step [21/1618], Loss: 1.9545, Perplexity: 7.0601
Epoch [3/3], Step [22/1618], Loss: 1.9770, Perplexity: 7.2213
Epoch [3/3], Step [23/1618], Loss: 2.0224, Perplexity: 7.5563
Epoch [3/3], Step [24/1618], Loss: 1.9432, Perplexity: 6.9813
Epoch [3/3], Step [25/1618], Loss: 1.9557, Perplexity: 7.0686
Epoch [3/3], Step [26/1618], Loss: 1.9673, Perplexity: 7.1510
Epoch [3/3], Step [27/1618], Loss: 1.9416, Perplexity: 6.9699
Epoch [3/3], Step [28/1618], Loss: 1.9464, Perplexity: 7.0035
Epoch [3/3], Step [29/1618], Loss: 2.0309, Perplexity: 7.6212
Epoch [3/3], Step [30/1618], Loss: 2.0854, Perplexity: 8.0478
Epoch [3/3], Step [31/1618], Loss: 2.2521, Perplexity: 9.5077
Epoch [3/3], Step [32/1618], Loss: 2.0534, Perplexity: 7.7947
Epoch [3/3], Step [33/1618], Loss: 1.9190, Perplexity: 6.8139
Epoch [3/3], Step [34/1618], Loss: 2.1234, Perplexity: 8.3593
Epoch [3/3], Step [35/1618], Loss: 2.0198, Perplexity: 7.5371
Epoch [3/3], Step [36/1618], Loss: 1.9470, Perplexity: 7.0076
Epoch [3/3], Step [37/1618], Loss: 1.9250, Perplexity: 6.8553
Epoch [3/3], Step [38/1618], Loss: 1.9348, Perplexity: 6.9224
Epoch [3/3], Step [39/1618], Loss: 2.0662, Perplexity: 7.8945
Epoch [3/3], Step [40/1618], Loss: 1.9804, Perplexity: 7.2459
Epoch [3/3], Step [41/1618], Loss: 2.2485, Perplexity: 9.4732
Epoch [3/3], Step [42/1618], Loss: 2.0968, Perplexity: 8.1404
Epoch [3/3], Step [43/1618], Loss: 1.9714, Perplexity: 7.1808
Epoch [3/3], Step [44/1618], Loss: 2.3935, Perplexity: 10.9514
Epoch [3/3], Step [45/1618], Loss: 1.8954, Perplexity: 6.6551
Epoch [3/3], Step [46/1618], Loss: 1.8906, Perplexity: 6.6234
Epoch [3/3], Step [47/1618], Loss: 2.0032, Perplexity: 7.4126
Epoch [3/3], Step [48/1618], Loss: 2.1137, Perplexity: 8.2791
Epoch [3/3], Step [49/1618], Loss: 1.9371, Perplexity: 6.9387
Epoch [3/3], Step [50/1618], Loss: 2.2289, Perplexity: 9.2898
Epoch [3/3], Step [51/1618], Loss: 1.8414, Perplexity: 6.3053
Epoch [3/3], Step [52/1618], Loss: 1.9318, Perplexity: 6.9017
Epoch [3/3], Step [53/1618], Loss: 1.8509, Perplexity: 6.3653
Epoch [3/3], Step [54/1618], Loss: 1.9040, Perplexity: 6.7126
Epoch [3/3], Step [55/1618], Loss: 2.0260, Perplexity: 7.5833
Epoch [3/3], Step [56/1618], Loss: 1.8928, Perplexity: 6.6379
Epoch [3/3], Step [57/1618], Loss: 1.9652, Perplexity: 7.1362
Epoch [3/3], Step [58/1618], Loss: 1.9830, Perplexity: 7.2644
Epoch [3/3], Step [59/1618], Loss: 1.9408, Perplexity: 6.9641
Epoch [3/3], Step [60/1618], Loss: 1.9777, Perplexity: 7.2263
Epoch [3/3], Step [61/1618], Loss: 1.9616, Perplexity: 7.1106
Epoch [3/3], Step [62/1618], Loss: 1.9982, Perplexity: 7.3756
Epoch [3/3], Step [63/1618], Loss: 1.9440, Perplexity: 6.9869
Epoch [3/3], Step [64/1618], Loss: 1.9601, Perplexity: 7.1002
Epoch [3/3], Step [65/1618], Loss: 2.0099, Perplexity: 7.4625
Epoch [3/3], Step [66/1618], Loss: 2.0204, Perplexity: 7.5413
Epoch [3/3], Step [67/1618], Loss: 1.9049, Perplexity: 6.7187
Epoch [3/3], Step [68/1618], Loss: 2.2123, Perplexity: 9.1368
Epoch [3/3], Step [69/1618], Loss: 2.0158, Perplexity: 7.5067
Epoch [3/3], Step [70/1618], Loss: 1.9312, Perplexity: 6.8977
Epoch [3/3], Step [71/1618], Loss: 2.3598, Perplexity: 10.5884
Epoch [3/3], Step [72/1618], Loss: 1.9273, Perplexity: 6.8707
Epoch [3/3], Step [73/1618], Loss: 2.0446, Perplexity: 7.7262
Epoch [3/3], Step [74/1618], Loss: 2.0361, Perplexity: 7.6610
Epoch [3/3], Step [75/1618], Loss: 1.8539, Perplexity: 6.3846
Epoch [3/3], Step [76/1618], Loss: 1.9686, Perplexity: 7.1603
Epoch [3/3], Step [77/1618], Loss: 1.8963, Perplexity: 6.6610
Epoch [3/3], Step [78/1618], Loss: 2.0338, Perplexity: 7.6430
Epoch [3/3], Step [79/1618], Loss: 1.8661, Perplexity: 6.4632
Epoch [3/3], Step [80/1618], Loss: 2.1954, Perplexity: 8.9840
Epoch [3/3], Step [81/1618], Loss: 1.8941, Perplexity: 6.6463
Epoch [3/3], Step [82/1618], Loss: 2.0267, Perplexity: 7.5888
Epoch [3/3], Step [83/1618], Loss: 2.2365, Perplexity: 9.3601
Epoch [3/3], Step [84/1618], Loss: 1.8889, Perplexity: 6.6119
Epoch [3/3], Step [85/1618], Loss: 1.9207, Perplexity: 6.8258
Epoch [3/3], Step [86/1618], Loss: 1.9711, Perplexity: 7.1782
Epoch [3/3], Step [87/1618], Loss: 1.9676, Perplexity: 7.1532
Epoch [3/3], Step [88/1618], Loss: 2.3091, Perplexity: 10.0652
Epoch [3/3], Step [89/1618], Loss: 1.9512, Perplexity: 7.0369
Epoch [3/3], Step [90/1618], Loss: 2.0303, Perplexity: 7.6162
Epoch [3/3], Step [91/1618], Loss: 2.0107, Perplexity: 7.4685
Epoch [3/3], Step [92/1618], Loss: 1.9524, Perplexity: 7.0454
Epoch [3/3], Step [93/1618], Loss: 1.8127, Perplexity: 6.1272
Epoch [3/3], Step [94/1618], Loss: 1.9854, Perplexity: 7.2816
Epoch [3/3], Step [95/1618], Loss: 1.8683, Perplexity: 6.4773
Epoch [3/3], Step [96/1618], Loss: 2.0705, Perplexity: 7.9284
Epoch [3/3], Step [97/1618], Loss: 1.9365, Perplexity: 6.9343
Epoch [3/3], Step [98/1618], Loss: 1.9272, Perplexity: 6.8699
Epoch [3/3], Step [99/1618], Loss: 1.8261, Perplexity: 6.2094
Epoch [3/3], Step [100/1618], Loss: 1.9955, Perplexity: 7.3557
Epoch [3/3], Step [101/1618], Loss: 1.9155, Perplexity: 6.7906
Epoch [3/3], Step [102/1618], Loss: 1.9973, Perplexity: 7.3693
Epoch [3/3], Step [103/1618], Loss: 2.0910, Perplexity: 8.0931
Epoch [3/3], Step [104/1618], Loss: 1.9945, Perplexity: 7.3485
Epoch [3/3], Step [105/1618], Loss: 1.9840, Perplexity: 7.2715
Epoch [3/3], Step [106/1618], Loss: 1.9792, Perplexity: 7.2370
Epoch [3/3], Step [107/1618], Loss: 2.0923, Perplexity: 8.1034
Epoch [3/3], Step [108/1618], Loss: 1.8953, Perplexity: 6.6543
Epoch [3/3], Step [109/1618], Loss: 2.0915, Perplexity: 8.0968
Epoch [3/3], Step [110/1618], Loss: 2.2066, Perplexity: 9.0846
Epoch [3/3], Step [111/1618], Loss: 2.3299, Perplexity: 10.2772
Epoch [3/3], Step [112/1618], Loss: 1.9297, Perplexity: 6.8871
Epoch [3/3], Step [113/1618], Loss: 1.8980, Perplexity: 6.6726
Epoch [3/3], Step [114/1618], Loss: 1.8866, Perplexity: 6.5971
Epoch [3/3], Step [115/1618], Loss: 2.0334, Perplexity: 7.6398
Epoch [3/3], Step [116/1618], Loss: 1.9730, Perplexity: 7.1921
Epoch [3/3], Step [117/1618], Loss: 2.1549, Perplexity: 8.6272
Epoch [3/3], Step [118/1618], Loss: 1.8942, Perplexity: 6.6475
Epoch [3/3], Step [119/1618], Loss: 2.1523, Perplexity: 8.6049
Epoch [3/3], Step [120/1618], Loss: 2.5036, Perplexity: 12.2268
Epoch [3/3], Step [121/1618], Loss: 2.1186, Perplexity: 8.3196
Epoch [3/3], Step [122/1618], Loss: 2.0422, Perplexity: 7.7079
Epoch [3/3], Step [123/1618], Loss: 1.8515, Perplexity: 6.3694
Epoch [3/3], Step [124/1618], Loss: 1.9974, Perplexity: 7.3698
Epoch [3/3], Step [125/1618], Loss: 1.9338, Perplexity: 6.9154
Epoch [3/3], Step [126/1618], Loss: 2.2087, Perplexity: 9.1036
Epoch [3/3], Step [127/1618], Loss: 2.1384, Perplexity: 8.4854
Epoch [3/3], Step [128/1618], Loss: 2.0839, Perplexity: 8.0356
Epoch [3/3], Step [129/1618], Loss: 1.9532, Perplexity: 7.0510
Epoch [3/3], Step [130/1618], Loss: 2.2356, Perplexity: 9.3520
Epoch [3/3], Step [131/1618], Loss: 2.3773, Perplexity: 10.7757
Epoch [3/3], Step [132/1618], Loss: 1.9588, Perplexity: 7.0907
Epoch [3/3], Step [133/1618], Loss: 2.0288, Perplexity: 7.6050
Epoch [3/3], Step [134/1618], Loss: 1.9679, Perplexity: 7.1558
Epoch [3/3], Step [135/1618], Loss: 1.9472, Perplexity: 7.0089
Epoch [3/3], Step [136/1618], Loss: 1.9186, Perplexity: 6.8111
Epoch [3/3], Step [137/1618], Loss: 1.9446, Perplexity: 6.9911
Epoch [3/3], Step [138/1618], Loss: 1.9967, Perplexity: 7.3646
Epoch [3/3], Step [139/1618], Loss: 2.0273, Perplexity: 7.5932
Epoch [3/3], Step [140/1618], Loss: 2.0883, Perplexity: 8.0714
Epoch [3/3], Step [141/1618], Loss: 1.9517, Perplexity: 7.0407
Epoch [3/3], Step [142/1618], Loss: 2.3974, Perplexity: 10.9943
Epoch [3/3], Step [143/1618], Loss: 2.1272, Perplexity: 8.3912
Epoch [3/3], Step [144/1618], Loss: 1.9464, Perplexity: 7.0035
Epoch [3/3], Step [145/1618], Loss: 1.8948, Perplexity: 6.6510
Epoch [3/3], Step [146/1618], Loss: 2.0617, Perplexity: 7.8597
Epoch [3/3], Step [147/1618], Loss: 2.0088, Perplexity: 7.4541
Epoch [3/3], Step [148/1618], Loss: 2.7208, Perplexity: 15.1921
Epoch [3/3], Step [149/1618], Loss: 1.9385, Perplexity: 6.9480
Epoch [3/3], Step [150/1618], Loss: 2.0164, Perplexity: 7.5112
Epoch [3/3], Step [151/1618], Loss: 1.9530, Perplexity: 7.0500
Epoch [3/3], Step [152/1618], Loss: 2.0100, Perplexity: 7.4634
Epoch [3/3], Step [153/1618], Loss: 2.1244, Perplexity: 8.3683
Epoch [3/3], Step [154/1618], Loss: 1.9816, Perplexity: 7.2541
Epoch [3/3], Step [155/1618], Loss: 2.0394, Perplexity: 7.6859
Epoch [3/3], Step [156/1618], Loss: 1.8420, Perplexity: 6.3094
Epoch [3/3], Step [157/1618], Loss: 2.1947, Perplexity: 8.9769
Epoch [3/3], Step [158/1618], Loss: 1.9005, Perplexity: 6.6890
Epoch [3/3], Step [159/1618], Loss: 2.3380, Perplexity: 10.3606
Epoch [3/3], Step [160/1618], Loss: 2.2663, Perplexity: 9.6441
Epoch [3/3], Step [161/1618], Loss: 1.9719, Perplexity: 7.1844
Epoch [3/3], Step [162/1618], Loss: 2.0822, Perplexity: 8.0222
Epoch [3/3], Step [163/1618], Loss: 3.1304, Perplexity: 22.8837
Epoch [3/3], Step [164/1618], Loss: 2.0295, Perplexity: 7.6101
Epoch [3/3], Step [165/1618], Loss: 1.9735, Perplexity: 7.1955
Epoch [3/3], Step [166/1618], Loss: 2.0238, Perplexity: 7.5669
Epoch [3/3], Step [167/1618], Loss: 2.4178, Perplexity: 11.2210
Epoch [3/3], Step [168/1618], Loss: 2.0279, Perplexity: 7.5982
Epoch [3/3], Step [169/1618], Loss: 2.0867, Perplexity: 8.0579
Epoch [3/3], Step [170/1618], Loss: 1.9832, Perplexity: 7.2656
Epoch [3/3], Step [171/1618], Loss: 1.9657, Perplexity: 7.1400
Epoch [3/3], Step [172/1618], Loss: 2.0752, Perplexity: 7.9658
Epoch [3/3], Step [173/1618], Loss: 1.9505, Perplexity: 7.0323
Epoch [3/3], Step [174/1618], Loss: 2.0524, Perplexity: 7.7869
Epoch [3/3], Step [175/1618], Loss: 1.9601, Perplexity: 7.0997
Epoch [3/3], Step [176/1618], Loss: 2.1021, Perplexity: 8.1836
Epoch [3/3], Step [177/1618], Loss: 1.9685, Perplexity: 7.1596
Epoch [3/3], Step [178/1618], Loss: 1.8976, Perplexity: 6.6700
Epoch [3/3], Step [179/1618], Loss: 1.9990, Perplexity: 7.3817
Epoch [3/3], Step [180/1618], Loss: 1.9175, Perplexity: 6.8043
Epoch [3/3], Step [181/1618], Loss: 2.9505, Perplexity: 19.1162
Epoch [3/3], Step [182/1618], Loss: 1.9594, Perplexity: 7.0947
Epoch [3/3], Step [183/1618], Loss: 1.8889, Perplexity: 6.6118
Epoch [3/3], Step [184/1618], Loss: 1.8981, Perplexity: 6.6732
Epoch [3/3], Step [185/1618], Loss: 2.1143, Perplexity: 8.2840
Epoch [3/3], Step [186/1618], Loss: 1.8437, Perplexity: 6.3196
Epoch [3/3], Step [187/1618], Loss: 2.2592, Perplexity: 9.5755
Epoch [3/3], Step [188/1618], Loss: 1.9496, Perplexity: 7.0260
Epoch [3/3], Step [189/1618], Loss: 1.9908, Perplexity: 7.3215
Epoch [3/3], Step [190/1618], Loss: 2.2364, Perplexity: 9.3600
Epoch [3/3], Step [191/1618], Loss: 2.1352, Perplexity: 8.4587
Epoch [3/3], Step [192/1618], Loss: 2.0071, Perplexity: 7.4418
Epoch [3/3], Step [193/1618], Loss: 1.9141, Perplexity: 6.7809
Epoch [3/3], Step [194/1618], Loss: 2.0502, Perplexity: 7.7694
Epoch [3/3], Step [195/1618], Loss: 1.9689, Perplexity: 7.1624
Epoch [3/3], Step [196/1618], Loss: 1.9550, Perplexity: 7.0637
Epoch [3/3], Step [197/1618], Loss: 1.9086, Perplexity: 6.7438
Epoch [3/3], Step [198/1618], Loss: 2.2694, Perplexity: 9.6739
Epoch [3/3], Step [199/1618], Loss: 1.9530, Perplexity: 7.0501
Epoch [3/3], Step [200/1618], Loss: 2.3776, Perplexity: 10.7785
Epoch [3/3], Step [201/1618], Loss: 1.9066, Perplexity: 6.7300
Epoch [3/3], Step [202/1618], Loss: 2.8068, Perplexity: 16.5565
Epoch [3/3], Step [203/1618], Loss: 2.0246, Perplexity: 7.5734
Epoch [3/3], Step [204/1618], Loss: 2.0214, Perplexity: 7.5491
Epoch [3/3], Step [205/1618], Loss: 1.9437, Perplexity: 6.9845
Epoch [3/3], Step [206/1618], Loss: 1.9354, Perplexity: 6.9271
Epoch [3/3], Step [207/1618], Loss: 2.0356, Perplexity: 7.6566
Epoch [3/3], Step [208/1618], Loss: 1.9769, Perplexity: 7.2203
Epoch [3/3], Step [209/1618], Loss: 2.0623, Perplexity: 7.8637
Epoch [3/3], Step [210/1618], Loss: 1.9816, Perplexity: 7.2543
Epoch [3/3], Step [211/1618], Loss: 2.2706, Perplexity: 9.6847
Epoch [3/3], Step [212/1618], Loss: 2.0700, Perplexity: 7.9246
Epoch [3/3], Step [213/1618], Loss: 2.0162, Perplexity: 7.5095
Epoch [3/3], Step [214/1618], Loss: 1.9032, Perplexity: 6.7072
Epoch [3/3], Step [215/1618], Loss: 1.9162, Perplexity: 6.7954
Epoch [3/3], Step [216/1618], Loss: 2.1053, Perplexity: 8.2096
Epoch [3/3], Step [217/1618], Loss: 2.1416, Perplexity: 8.5129
Epoch [3/3], Step [218/1618], Loss: 2.0387, Perplexity: 7.6809
Epoch [3/3], Step [219/1618], Loss: 2.1495, Perplexity: 8.5805
Epoch [3/3], Step [220/1618], Loss: 1.9484, Perplexity: 7.0171
Epoch [3/3], Step [221/1618], Loss: 1.9794, Perplexity: 7.2380
Epoch [3/3], Step [222/1618], Loss: 2.7427, Perplexity: 15.5283
Epoch [3/3], Step [223/1618], Loss: 1.9478, Perplexity: 7.0135
Epoch [3/3], Step [224/1618], Loss: 1.9043, Perplexity: 6.7148
Epoch [3/3], Step [225/1618], Loss: 1.9313, Perplexity: 6.8986
Epoch [3/3], Step [226/1618], Loss: 1.8907, Perplexity: 6.6240
Epoch [3/3], Step [227/1618], Loss: 2.5993, Perplexity: 13.4544
Epoch [3/3], Step [228/1618], Loss: 1.9831, Perplexity: 7.2653
Epoch [3/3], Step [229/1618], Loss: 1.9348, Perplexity: 6.9226
Epoch [3/3], Step [230/1618], Loss: 1.8762, Perplexity: 6.5288
Epoch [3/3], Step [231/1618], Loss: 2.4110, Perplexity: 11.1455
Epoch [3/3], Step [232/1618], Loss: 1.9128, Perplexity: 6.7720
Epoch [3/3], Step [233/1618], Loss: 2.1730, Perplexity: 8.7845
Epoch [3/3], Step [234/1618], Loss: 1.9849, Perplexity: 7.2780
Epoch [3/3], Step [235/1618], Loss: 2.0681, Perplexity: 7.9095
Epoch [3/3], Step [236/1618], Loss: 1.8993, Perplexity: 6.6815
Epoch [3/3], Step [237/1618], Loss: 1.9955, Perplexity: 7.3555
Epoch [3/3], Step [238/1618], Loss: 1.9044, Perplexity: 6.7151
Epoch [3/3], Step [239/1618], Loss: 1.9827, Perplexity: 7.2623
Epoch [3/3], Step [240/1618], Loss: 3.3090, Perplexity: 27.3574
Epoch [3/3], Step [241/1618], Loss: 2.0580, Perplexity: 7.8300
Epoch [3/3], Step [242/1618], Loss: 1.9836, Perplexity: 7.2687
Epoch [3/3], Step [243/1618], Loss: 1.9891, Perplexity: 7.3093
Epoch [3/3], Step [244/1618], Loss: 2.0816, Perplexity: 8.0175
Epoch [3/3], Step [245/1618], Loss: 2.2745, Perplexity: 9.7226
Epoch [3/3], Step [246/1618], Loss: 2.0359, Perplexity: 7.6595
Epoch [3/3], Step [247/1618], Loss: 2.0799, Perplexity: 8.0033
Epoch [3/3], Step [248/1618], Loss: 2.2411, Perplexity: 9.4036
Epoch [3/3], Step [249/1618], Loss: 2.3086, Perplexity: 10.0608
Epoch [3/3], Step [250/1618], Loss: 2.0839, Perplexity: 8.0354
Epoch [3/3], Step [251/1618], Loss: 2.0691, Perplexity: 7.9174
Epoch [3/3], Step [252/1618], Loss: 2.1001, Perplexity: 8.1671
Epoch [3/3], Step [253/1618], Loss: 1.9737, Perplexity: 7.1972
Epoch [3/3], Step [254/1618], Loss: 1.9876, Perplexity: 7.2983
Epoch [3/3], Step [255/1618], Loss: 2.1064, Perplexity: 8.2183
Epoch [3/3], Step [256/1618], Loss: 1.9978, Perplexity: 7.3727
Epoch [3/3], Step [257/1618], Loss: 1.8816, Perplexity: 6.5640
Epoch [3/3], Step [258/1618], Loss: 1.9679, Perplexity: 7.1557
Epoch [3/3], Step [259/1618], Loss: 2.0055, Perplexity: 7.4302
Epoch [3/3], Step [260/1618], Loss: 2.0624, Perplexity: 7.8649
Epoch [3/3], Step [261/1618], Loss: 1.9012, Perplexity: 6.6938
Epoch [3/3], Step [262/1618], Loss: 1.9747, Perplexity: 7.2048
Epoch [3/3], Step [263/1618], Loss: 2.0461, Perplexity: 7.7375
Epoch [3/3], Step [264/1618], Loss: 2.1784, Perplexity: 8.8319
Epoch [3/3], Step [265/1618], Loss: 1.9239, Perplexity: 6.8475
Epoch [3/3], Step [266/1618], Loss: 1.9092, Perplexity: 6.7478
Epoch [3/3], Step [267/1618], Loss: 2.1291, Perplexity: 8.4075
Epoch [3/3], Step [268/1618], Loss: 2.2992, Perplexity: 9.9658
Epoch [3/3], Step [269/1618], Loss: 2.0517, Perplexity: 7.7809
Epoch [3/3], Step [270/1618], Loss: 1.9736, Perplexity: 7.1964
Epoch [3/3], Step [271/1618], Loss: 1.9671, Perplexity: 7.1496
Epoch [3/3], Step [272/1618], Loss: 1.9449, Perplexity: 6.9926
Epoch [3/3], Step [273/1618], Loss: 2.0153, Perplexity: 7.5026
Epoch [3/3], Step [274/1618], Loss: 2.2669, Perplexity: 9.6490
Epoch [3/3], Step [275/1618], Loss: 2.0563, Perplexity: 7.8173
Epoch [3/3], Step [276/1618], Loss: 1.9373, Perplexity: 6.9401
Epoch [3/3], Step [277/1618], Loss: 1.9685, Perplexity: 7.1599
Epoch [3/3], Step [278/1618], Loss: 1.9252, Perplexity: 6.8567
Epoch [3/3], Step [279/1618], Loss: 1.8702, Perplexity: 6.4896
Epoch [3/3], Step [280/1618], Loss: 1.9042, Perplexity: 6.7141
Epoch [3/3], Step [281/1618], Loss: 2.0138, Perplexity: 7.4916
Epoch [3/3], Step [282/1618], Loss: 1.9083, Perplexity: 6.7419
Epoch [3/3], Step [283/1618], Loss: 2.0530, Perplexity: 7.7909
Epoch [3/3], Step [284/1618], Loss: 1.9434, Perplexity: 6.9824
Epoch [3/3], Step [285/1618], Loss: 1.8995, Perplexity: 6.6827
Epoch [3/3], Step [286/1618], Loss: 1.9549, Perplexity: 7.0634
Epoch [3/3], Step [287/1618], Loss: 1.9617, Perplexity: 7.1111
Epoch [3/3], Step [288/1618], Loss: 2.0072, Perplexity: 7.4425
Epoch [3/3], Step [289/1618], Loss: 2.2978, Perplexity: 9.9524
Epoch [3/3], Step [290/1618], Loss: 1.9301, Perplexity: 6.8904
Epoch [3/3], Step [291/1618], Loss: 1.9926, Perplexity: 7.3346
Epoch [3/3], Step [292/1618], Loss: 2.0425, Perplexity: 7.7102
Epoch [3/3], Step [293/1618], Loss: 2.1588, Perplexity: 8.6607
Epoch [3/3], Step [294/1618], Loss: 2.1489, Perplexity: 8.5751
Epoch [3/3], Step [295/1618], Loss: 3.0217, Perplexity: 20.5265
Epoch [3/3], Step [296/1618], Loss: 2.3733, Perplexity: 10.7327
Epoch [3/3], Step [297/1618], Loss: 1.9189, Perplexity: 6.8132
Epoch [3/3], Step [298/1618], Loss: 2.1228, Perplexity: 8.3547
Epoch [3/3], Step [299/1618], Loss: 2.2330, Perplexity: 9.3280
Epoch [3/3], Step [300/1618], Loss: 1.8922, Perplexity: 6.6342
Epoch [3/3], Step [301/1618], Loss: 1.9719, Perplexity: 7.1843
Epoch [3/3], Step [302/1618], Loss: 2.0179, Perplexity: 7.5222
Epoch [3/3], Step [303/1618], Loss: 1.9139, Perplexity: 6.7792
Epoch [3/3], Step [304/1618], Loss: 1.9785, Perplexity: 7.2318
Epoch [3/3], Step [305/1618], Loss: 2.1667, Perplexity: 8.7291
Epoch [3/3], Step [306/1618], Loss: 2.1145, Perplexity: 8.2853
Epoch [3/3], Step [307/1618], Loss: 1.9135, Perplexity: 6.7765
Epoch [3/3], Step [308/1618], Loss: 1.9011, Perplexity: 6.6931
Epoch [3/3], Step [309/1618], Loss: 2.2311, Perplexity: 9.3100
Epoch [3/3], Step [310/1618], Loss: 1.8384, Perplexity: 6.2866
Epoch [3/3], Step [311/1618], Loss: 1.9473, Perplexity: 7.0096
Epoch [3/3], Step [312/1618], Loss: 1.8833, Perplexity: 6.5753
Epoch [3/3], Step [313/1618], Loss: 2.2507, Perplexity: 9.4944
Epoch [3/3], Step [314/1618], Loss: 2.0130, Perplexity: 7.4856
Epoch [3/3], Step [315/1618], Loss: 2.0279, Perplexity: 7.5984
Epoch [3/3], Step [316/1618], Loss: 1.9192, Perplexity: 6.8156
Epoch [3/3], Step [317/1618], Loss: 1.9969, Perplexity: 7.3662
Epoch [3/3], Step [318/1618], Loss: 2.0796, Perplexity: 8.0011
Epoch [3/3], Step [319/1618], Loss: 1.8464, Perplexity: 6.3371
Epoch [3/3], Step [320/1618], Loss: 1.9113, Perplexity: 6.7620
Epoch [3/3], Step [321/1618], Loss: 2.2025, Perplexity: 9.0475
Epoch [3/3], Step [322/1618], Loss: 2.4904, Perplexity: 12.0657
Epoch [3/3], Step [323/1618], Loss: 2.0553, Perplexity: 7.8089
Epoch [3/3], Step [324/1618], Loss: 2.0133, Perplexity: 7.4883
Epoch [3/3], Step [325/1618], Loss: 1.8455, Perplexity: 6.3311
Epoch [3/3], Step [326/1618], Loss: 1.9249, Perplexity: 6.8542
Epoch [3/3], Step [327/1618], Loss: 2.0687, Perplexity: 7.9145
Epoch [3/3], Step [328/1618], Loss: 1.8619, Perplexity: 6.4361
Epoch [3/3], Step [329/1618], Loss: 1.9693, Perplexity: 7.1658
Epoch [3/3], Step [330/1618], Loss: 1.9634, Perplexity: 7.1234
Epoch [3/3], Step [331/1618], Loss: 1.9319, Perplexity: 6.9023
Epoch [3/3], Step [332/1618], Loss: 1.8819, Perplexity: 6.5661
Epoch [3/3], Step [333/1618], Loss: 2.0679, Perplexity: 7.9080
Epoch [3/3], Step [334/1618], Loss: 1.9487, Perplexity: 7.0197
Epoch [3/3], Step [335/1618], Loss: 1.8889, Perplexity: 6.6120
Epoch [3/3], Step [336/1618], Loss: 1.9609, Perplexity: 7.1058
Epoch [3/3], Step [337/1618], Loss: 2.0126, Perplexity: 7.4829
Epoch [3/3], Step [338/1618], Loss: 1.9109, Perplexity: 6.7593
Epoch [3/3], Step [339/1618], Loss: 1.9951, Perplexity: 7.3527
Epoch [3/3], Step [340/1618], Loss: 2.0729, Perplexity: 7.9480
Epoch [3/3], Step [341/1618], Loss: 2.0286, Perplexity: 7.6034
Epoch [3/3], Step [342/1618], Loss: 1.9853, Perplexity: 7.2810
Epoch [3/3], Step [343/1618], Loss: 1.7824, Perplexity: 5.9443
Epoch [3/3], Step [344/1618], Loss: 3.5202, Perplexity: 33.7911
Epoch [3/3], Step [345/1618], Loss: 1.9841, Perplexity: 7.2723
Epoch [3/3], Step [346/1618], Loss: 2.1399, Perplexity: 8.4983
Epoch [3/3], Step [347/1618], Loss: 1.9858, Perplexity: 7.2846
Epoch [3/3], Step [348/1618], Loss: 2.0420, Perplexity: 7.7063
Epoch [3/3], Step [349/1618], Loss: 1.9594, Perplexity: 7.0948
Epoch [3/3], Step [350/1618], Loss: 2.0769, Perplexity: 7.9795
Epoch [3/3], Step [351/1618], Loss: 1.9310, Perplexity: 6.8966
Epoch [3/3], Step [352/1618], Loss: 1.9929, Perplexity: 7.3366
Epoch [3/3], Step [353/1618], Loss: 2.0043, Perplexity: 7.4206
Epoch [3/3], Step [354/1618], Loss: 1.8945, Perplexity: 6.6495
Epoch [3/3], Step [355/1618], Loss: 2.2018, Perplexity: 9.0414
Epoch [3/3], Step [356/1618], Loss: 2.0209, Perplexity: 7.5450
Epoch [3/3], Step [357/1618], Loss: 1.9945, Perplexity: 7.3488
Epoch [3/3], Step [358/1618], Loss: 1.9682, Perplexity: 7.1579
Epoch [3/3], Step [359/1618], Loss: 1.9124, Perplexity: 6.7693
Epoch [3/3], Step [360/1618], Loss: 1.8943, Perplexity: 6.6479
Epoch [3/3], Step [361/1618], Loss: 2.1393, Perplexity: 8.4939
Epoch [3/3], Step [362/1618], Loss: 1.8888, Perplexity: 6.6117
Epoch [3/3], Step [363/1618], Loss: 1.8637, Perplexity: 6.4477
Epoch [3/3], Step [364/1618], Loss: 2.2258, Perplexity: 9.2607
Epoch [3/3], Step [365/1618], Loss: 1.9918, Perplexity: 7.3287
Epoch [3/3], Step [366/1618], Loss: 1.9670, Perplexity: 7.1492
Epoch [3/3], Step [367/1618], Loss: 1.9288, Perplexity: 6.8815
Epoch [3/3], Step [368/1618], Loss: 2.0171, Perplexity: 7.5162
Epoch [3/3], Step [369/1618], Loss: 1.8732, Perplexity: 6.5091
Epoch [3/3], Step [370/1618], Loss: 1.9802, Perplexity: 7.2442
Epoch [3/3], Step [371/1618], Loss: 1.9204, Perplexity: 6.8239
Epoch [3/3], Step [372/1618], Loss: 1.9922, Perplexity: 7.3317
Epoch [3/3], Step [373/1618], Loss: 1.9880, Perplexity: 7.3006
Epoch [3/3], Step [374/1618], Loss: 2.1184, Perplexity: 8.3181
Epoch [3/3], Step [375/1618], Loss: 2.0676, Perplexity: 7.9056
Epoch [3/3], Step [376/1618], Loss: 1.9316, Perplexity: 6.9005
Epoch [3/3], Step [377/1618], Loss: 1.9376, Perplexity: 6.9422
Epoch [3/3], Step [378/1618], Loss: 1.9652, Perplexity: 7.1365
Epoch [3/3], Step [379/1618], Loss: 1.9269, Perplexity: 6.8684
Epoch [3/3], Step [380/1618], Loss: 1.9227, Perplexity: 6.8393
Epoch [3/3], Step [381/1618], Loss: 1.9124, Perplexity: 6.7696
Epoch [3/3], Step [382/1618], Loss: 3.1660, Perplexity: 23.7123
Epoch [3/3], Step [383/1618], Loss: 2.3945, Perplexity: 10.9633
Epoch [3/3], Step [384/1618], Loss: 1.9289, Perplexity: 6.8821
Epoch [3/3], Step [385/1618], Loss: 1.9220, Perplexity: 6.8343
Epoch [3/3], Step [386/1618], Loss: 1.9555, Perplexity: 7.0676
Epoch [3/3], Step [387/1618], Loss: 1.8995, Perplexity: 6.6827
Epoch [3/3], Step [388/1618], Loss: 2.0110, Perplexity: 7.4707
Epoch [3/3], Step [389/1618], Loss: 2.0768, Perplexity: 7.9793
Epoch [3/3], Step [390/1618], Loss: 2.0012, Perplexity: 7.3980
Epoch [3/3], Step [391/1618], Loss: 2.3423, Perplexity: 10.4049
Epoch [3/3], Step [392/1618], Loss: 2.0121, Perplexity: 7.4791
Epoch [3/3], Step [393/1618], Loss: 1.9086, Perplexity: 6.7435
Epoch [3/3], Step [394/1618], Loss: 1.9874, Perplexity: 7.2964
Epoch [3/3], Step [395/1618], Loss: 2.1414, Perplexity: 8.5117
Epoch [3/3], Step [396/1618], Loss: 1.9847, Perplexity: 7.2766
Epoch [3/3], Step [397/1618], Loss: 1.9583, Perplexity: 7.0871
Epoch [3/3], Step [398/1618], Loss: 2.0786, Perplexity: 7.9931
Epoch [3/3], Step [399/1618], Loss: 2.5340, Perplexity: 12.6044
Epoch [3/3], Step [400/1618], Loss: 1.8858, Perplexity: 6.5918
Epoch [3/3], Step [401/1618], Loss: 1.9553, Perplexity: 7.0659
Epoch [3/3], Step [402/1618], Loss: 2.2885, Perplexity: 9.8605
Epoch [3/3], Step [403/1618], Loss: 1.9614, Perplexity: 7.1094
Epoch [3/3], Step [404/1618], Loss: 2.0766, Perplexity: 7.9769
Epoch [3/3], Step [405/1618], Loss: 1.9565, Perplexity: 7.0748
Epoch [3/3], Step [406/1618], Loss: 1.8898, Perplexity: 6.6180
Epoch [3/3], Step [407/1618], Loss: 2.1773, Perplexity: 8.8225
Epoch [3/3], Step [408/1618], Loss: 2.0269, Perplexity: 7.5907
Epoch [3/3], Step [409/1618], Loss: 1.7949, Perplexity: 6.0188
Epoch [3/3], Step [410/1618], Loss: 1.9151, Perplexity: 6.7875
Epoch [3/3], Step [411/1618], Loss: 1.9566, Perplexity: 7.0751
Epoch [3/3], Step [412/1618], Loss: 1.9316, Perplexity: 6.9008
Epoch [3/3], Step [413/1618], Loss: 1.8881, Perplexity: 6.6065
Epoch [3/3], Step [414/1618], Loss: 1.9063, Perplexity: 6.7281
Epoch [3/3], Step [415/1618], Loss: 1.8928, Perplexity: 6.6381
Epoch [3/3], Step [416/1618], Loss: 1.9747, Perplexity: 7.2042
Epoch [3/3], Step [417/1618], Loss: 1.8887, Perplexity: 6.6109
Epoch [3/3], Step [418/1618], Loss: 2.5301, Perplexity: 12.5549
Epoch [3/3], Step [419/1618], Loss: 1.9332, Perplexity: 6.9118
Epoch [3/3], Step [420/1618], Loss: 2.0365, Perplexity: 7.6640
Epoch [3/3], Step [421/1618], Loss: 1.9510, Perplexity: 7.0360
Epoch [3/3], Step [422/1618], Loss: 2.4139, Perplexity: 11.1771
Epoch [3/3], Step [423/1618], Loss: 1.9173, Perplexity: 6.8029
Epoch [3/3], Step [424/1618], Loss: 1.9879, Perplexity: 7.3001
Epoch [3/3], Step [425/1618], Loss: 1.8698, Perplexity: 6.4868
Epoch [3/3], Step [426/1618], Loss: 2.1750, Perplexity: 8.8022
Epoch [3/3], Step [427/1618], Loss: 1.8525, Perplexity: 6.3759
Epoch [3/3], Step [428/1618], Loss: 1.9878, Perplexity: 7.2996
Epoch [3/3], Step [429/1618], Loss: 2.2512, Perplexity: 9.4993
Epoch [3/3], Step [430/1618], Loss: 1.9371, Perplexity: 6.9387
Epoch [3/3], Step [431/1618], Loss: 2.2856, Perplexity: 9.8311
Epoch [3/3], Step [432/1618], Loss: 1.9701, Perplexity: 7.1715
Epoch [3/3], Step [433/1618], Loss: 2.1868, Perplexity: 8.9070
Epoch [3/3], Step [434/1618], Loss: 1.9339, Perplexity: 6.9166
Epoch [3/3], Step [435/1618], Loss: 1.8778, Perplexity: 6.5389
Epoch [3/3], Step [436/1618], Loss: 2.0134, Perplexity: 7.4885
Epoch [3/3], Step [437/1618], Loss: 1.9405, Perplexity: 6.9626
Epoch [3/3], Step [438/1618], Loss: 1.9241, Perplexity: 6.8492
Epoch [3/3], Step [439/1618], Loss: 1.9919, Perplexity: 7.3296
Epoch [3/3], Step [440/1618], Loss: 2.0497, Perplexity: 7.7656
Epoch [3/3], Step [441/1618], Loss: 1.8992, Perplexity: 6.6803
Epoch [3/3], Step [442/1618], Loss: 1.9363, Perplexity: 6.9330
Epoch [3/3], Step [443/1618], Loss: 1.8721, Perplexity: 6.5020
Epoch [3/3], Step [444/1618], Loss: 1.9551, Perplexity: 7.0649
Epoch [3/3], Step [445/1618], Loss: 1.9761, Perplexity: 7.2143
Epoch [3/3], Step [446/1618], Loss: 1.9334, Perplexity: 6.9127
Epoch [3/3], Step [447/1618], Loss: 1.9380, Perplexity: 6.9449
Epoch [3/3], Step [448/1618], Loss: 2.0296, Perplexity: 7.6111
Epoch [3/3], Step [449/1618], Loss: 2.2068, Perplexity: 9.0868
Epoch [3/3], Step [450/1618], Loss: 2.0028, Perplexity: 7.4098
Epoch [3/3], Step [451/1618], Loss: 2.0596, Perplexity: 7.8431
Epoch [3/3], Step [452/1618], Loss: 1.9101, Perplexity: 6.7534
Epoch [3/3], Step [453/1618], Loss: 2.0085, Perplexity: 7.4518
Epoch [3/3], Step [454/1618], Loss: 2.0457, Perplexity: 7.7344
Epoch [3/3], Step [455/1618], Loss: 2.3632, Perplexity: 10.6245
Epoch [3/3], Step [456/1618], Loss: 1.7819, Perplexity: 5.9412
Epoch [3/3], Step [457/1618], Loss: 1.8886, Perplexity: 6.6101
Epoch [3/3], Step [458/1618], Loss: 1.8751, Perplexity: 6.5217
Epoch [3/3], Step [459/1618], Loss: 2.1469, Perplexity: 8.5580
Epoch [3/3], Step [460/1618], Loss: 1.9029, Perplexity: 6.7056
Epoch [3/3], Step [461/1618], Loss: 2.1093, Perplexity: 8.2429
Epoch [3/3], Step [462/1618], Loss: 1.8341, Perplexity: 6.2592
Epoch [3/3], Step [463/1618], Loss: 1.9584, Perplexity: 7.0883
Epoch [3/3], Step [464/1618], Loss: 1.8593, Perplexity: 6.4196
Epoch [3/3], Step [465/1618], Loss: 1.8517, Perplexity: 6.3709
Epoch [3/3], Step [466/1618], Loss: 1.9108, Perplexity: 6.7583
Epoch [3/3], Step [467/1618], Loss: 1.9612, Perplexity: 7.1082
Epoch [3/3], Step [468/1618], Loss: 2.0172, Perplexity: 7.5170
Epoch [3/3], Step [469/1618], Loss: 1.9686, Perplexity: 7.1606
Epoch [3/3], Step [470/1618], Loss: 2.3210, Perplexity: 10.1863
Epoch [3/3], Step [471/1618], Loss: 1.9020, Perplexity: 6.6991
Epoch [3/3], Step [472/1618], Loss: 2.0468, Perplexity: 7.7430
Epoch [3/3], Step [473/1618], Loss: 1.9994, Perplexity: 7.3848
Epoch [3/3], Step [474/1618], Loss: 1.9274, Perplexity: 6.8719
Epoch [3/3], Step [475/1618], Loss: 1.8661, Perplexity: 6.4627
Epoch [3/3], Step [476/1618], Loss: 1.9545, Perplexity: 7.0606
Epoch [3/3], Step [477/1618], Loss: 1.9136, Perplexity: 6.7776
Epoch [3/3], Step [478/1618], Loss: 1.9426, Perplexity: 6.9769
Epoch [3/3], Step [479/1618], Loss: 1.8723, Perplexity: 6.5030
Epoch [3/3], Step [480/1618], Loss: 2.0048, Perplexity: 7.4244
Epoch [3/3], Step [481/1618], Loss: 2.0152, Perplexity: 7.5022
Epoch [3/3], Step [482/1618], Loss: 1.8733, Perplexity: 6.5099
Epoch [3/3], Step [483/1618], Loss: 1.8720, Perplexity: 6.5012
Epoch [3/3], Step [484/1618], Loss: 2.1569, Perplexity: 8.6445
Epoch [3/3], Step [485/1618], Loss: 1.8997, Perplexity: 6.6840
Epoch [3/3], Step [486/1618], Loss: 1.9166, Perplexity: 6.7979
Epoch [3/3], Step [487/1618], Loss: 1.8528, Perplexity: 6.3779
Epoch [3/3], Step [488/1618], Loss: 2.0558, Perplexity: 7.8130
Epoch [3/3], Step [489/1618], Loss: 1.9152, Perplexity: 6.7883
Epoch [3/3], Step [490/1618], Loss: 1.8795, Perplexity: 6.5505
Epoch [3/3], Step [491/1618], Loss: 1.9025, Perplexity: 6.7029
Epoch [3/3], Step [492/1618], Loss: 1.9492, Perplexity: 7.0228
Epoch [3/3], Step [493/1618], Loss: 1.8907, Perplexity: 6.6237
Epoch [3/3], Step [494/1618], Loss: 1.9947, Perplexity: 7.3500
Epoch [3/3], Step [495/1618], Loss: 1.8840, Perplexity: 6.5795
Epoch [3/3], Step [496/1618], Loss: 2.0098, Perplexity: 7.4621
Epoch [3/3], Step [497/1618], Loss: 1.9181, Perplexity: 6.8080
Epoch [3/3], Step [498/1618], Loss: 1.9306, Perplexity: 6.8939
Epoch [3/3], Step [499/1618], Loss: 1.9079, Perplexity: 6.7390
Epoch [3/3], Step [500/1618], Loss: 2.4523, Perplexity: 11.6151
Epoch [3/3], Step [501/1618], Loss: 2.1397, Perplexity: 8.4966
Epoch [3/3], Step [502/1618], Loss: 1.9394, Perplexity: 6.9549
Epoch [3/3], Step [503/1618], Loss: 1.8956, Perplexity: 6.6568
Epoch [3/3], Step [504/1618], Loss: 1.9443, Perplexity: 6.9886
Epoch [3/3], Step [505/1618], Loss: 2.2077, Perplexity: 9.0949
Epoch [3/3], Step [506/1618], Loss: 1.9558, Perplexity: 7.0698
Epoch [3/3], Step [507/1618], Loss: 2.0353, Perplexity: 7.6546
Epoch [3/3], Step [508/1618], Loss: 1.9366, Perplexity: 6.9353
Epoch [3/3], Step [509/1618], Loss: 1.9078, Perplexity: 6.7380
Epoch [3/3], Step [510/1618], Loss: 2.0899, Perplexity: 8.0839
Epoch [3/3], Step [511/1618], Loss: 1.9381, Perplexity: 6.9456
Epoch [3/3], Step [512/1618], Loss: 1.9985, Perplexity: 7.3779
Epoch [3/3], Step [513/1618], Loss: 1.9993, Perplexity: 7.3841
Epoch [3/3], Step [514/1618], Loss: 2.0583, Perplexity: 7.8328
Epoch [3/3], Step [515/1618], Loss: 2.1636, Perplexity: 8.7022
Epoch [3/3], Step [516/1618], Loss: 2.1876, Perplexity: 8.9140
Epoch [3/3], Step [517/1618], Loss: 1.9023, Perplexity: 6.7011
Epoch [3/3], Step [518/1618], Loss: 2.1241, Perplexity: 8.3653
Epoch [3/3], Step [519/1618], Loss: 1.9571, Perplexity: 7.0785
Epoch [3/3], Step [520/1618], Loss: 2.0150, Perplexity: 7.5005
Epoch [3/3], Step [521/1618], Loss: 1.9037, Perplexity: 6.7107
Epoch [3/3], Step [522/1618], Loss: 1.9543, Perplexity: 7.0588
Epoch [3/3], Step [523/1618], Loss: 1.9430, Perplexity: 6.9794
Epoch [3/3], Step [524/1618], Loss: 1.9250, Perplexity: 6.8553
Epoch [3/3], Step [525/1618], Loss: 2.0044, Perplexity: 7.4218
Epoch [3/3], Step [526/1618], Loss: 1.8729, Perplexity: 6.5070
Epoch [3/3], Step [527/1618], Loss: 2.0038, Perplexity: 7.4169
Epoch [3/3], Step [528/1618], Loss: 2.1067, Perplexity: 8.2208
Epoch [3/3], Step [529/1618], Loss: 2.1798, Perplexity: 8.8447
Epoch [3/3], Step [530/1618], Loss: 1.9327, Perplexity: 6.9082
Epoch [3/3], Step [531/1618], Loss: 1.9246, Perplexity: 6.8524
Epoch [3/3], Step [532/1618], Loss: 1.8685, Perplexity: 6.4789
Epoch [3/3], Step [533/1618], Loss: 2.0165, Perplexity: 7.5123
Epoch [3/3], Step [534/1618], Loss: 1.9228, Perplexity: 6.8399
Epoch [3/3], Step [535/1618], Loss: 2.1191, Perplexity: 8.3234
Epoch [3/3], Step [536/1618], Loss: 1.8574, Perplexity: 6.4069
Epoch [3/3], Step [537/1618], Loss: 1.9213, Perplexity: 6.8302
Epoch [3/3], Step [538/1618], Loss: 1.8826, Perplexity: 6.5705
Epoch [3/3], Step [539/1618], Loss: 2.1706, Perplexity: 8.7638
Epoch [3/3], Step [540/1618], Loss: 1.9760, Perplexity: 7.2138
Epoch [3/3], Step [541/1618], Loss: 2.0316, Perplexity: 7.6265
Epoch [3/3], Step [542/1618], Loss: 1.9943, Perplexity: 7.3473
Epoch [3/3], Step [543/1618], Loss: 1.8843, Perplexity: 6.5818
Epoch [3/3], Step [544/1618], Loss: 1.9428, Perplexity: 6.9781
Epoch [3/3], Step [545/1618], Loss: 1.9138, Perplexity: 6.7787
Epoch [3/3], Step [546/1618], Loss: 1.8830, Perplexity: 6.5729
Epoch [3/3], Step [547/1618], Loss: 1.9231, Perplexity: 6.8421
Epoch [3/3], Step [548/1618], Loss: 2.7096, Perplexity: 15.0234
Epoch [3/3], Step [549/1618], Loss: 1.9699, Perplexity: 7.1698
Epoch [3/3], Step [550/1618], Loss: 1.9484, Perplexity: 7.0177
Epoch [3/3], Step [551/1618], Loss: 1.9855, Perplexity: 7.2824
Epoch [3/3], Step [552/1618], Loss: 1.9497, Perplexity: 7.0268
Epoch [3/3], Step [553/1618], Loss: 1.9236, Perplexity: 6.8458
Epoch [3/3], Step [554/1618], Loss: 1.9632, Perplexity: 7.1219
Epoch [3/3], Step [555/1618], Loss: 1.9308, Perplexity: 6.8949
Epoch [3/3], Step [556/1618], Loss: 2.1548, Perplexity: 8.6261
Epoch [3/3], Step [557/1618], Loss: 1.8839, Perplexity: 6.5794
Epoch [3/3], Step [558/1618], Loss: 1.9231, Perplexity: 6.8423
Epoch [3/3], Step [559/1618], Loss: 1.8843, Perplexity: 6.5819
Epoch [3/3], Step [560/1618], Loss: 1.8788, Perplexity: 6.5458
Epoch [3/3], Step [561/1618], Loss: 2.1159, Perplexity: 8.2970
Epoch [3/3], Step [562/1618], Loss: 1.9248, Perplexity: 6.8537
Epoch [3/3], Step [563/1618], Loss: 1.8843, Perplexity: 6.5814
Epoch [3/3], Step [564/1618], Loss: 2.1082, Perplexity: 8.2334
Epoch [3/3], Step [565/1618], Loss: 1.8868, Perplexity: 6.5981
Epoch [3/3], Step [566/1618], Loss: 1.9717, Perplexity: 7.1829
Epoch [3/3], Step [567/1618], Loss: 2.0133, Perplexity: 7.4878
Epoch [3/3], Step [568/1618], Loss: 2.0457, Perplexity: 7.7347
Epoch [3/3], Step [569/1618], Loss: 1.9201, Perplexity: 6.8217
Epoch [3/3], Step [570/1618], Loss: 1.9167, Perplexity: 6.7988
Epoch [3/3], Step [571/1618], Loss: 2.2603, Perplexity: 9.5861
Epoch [3/3], Step [572/1618], Loss: 1.9709, Perplexity: 7.1774
Epoch [3/3], Step [573/1618], Loss: 2.1083, Perplexity: 8.2343
Epoch [3/3], Step [574/1618], Loss: 1.8818, Perplexity: 6.5654
Epoch [3/3], Step [575/1618], Loss: 2.1102, Perplexity: 8.2502
Epoch [3/3], Step [576/1618], Loss: 1.9792, Perplexity: 7.2368
Epoch [3/3], Step [577/1618], Loss: 1.8537, Perplexity: 6.3836
Epoch [3/3], Step [578/1618], Loss: 1.8900, Perplexity: 6.6192
Epoch [3/3], Step [579/1618], Loss: 1.8848, Perplexity: 6.5852
Epoch [3/3], Step [580/1618], Loss: 1.8583, Perplexity: 6.4131
Epoch [3/3], Step [581/1618], Loss: 1.9903, Perplexity: 7.3175
Epoch [3/3], Step [582/1618], Loss: 2.0265, Perplexity: 7.5879
Epoch [3/3], Step [583/1618], Loss: 1.9719, Perplexity: 7.1846
Epoch [3/3], Step [584/1618], Loss: 1.8651, Perplexity: 6.4563
Epoch [3/3], Step [585/1618], Loss: 1.9137, Perplexity: 6.7779
Epoch [3/3], Step [586/1618], Loss: 1.8521, Perplexity: 6.3729
Epoch [3/3], Step [587/1618], Loss: 2.0142, Perplexity: 7.4946
Epoch [3/3], Step [588/1618], Loss: 2.2984, Perplexity: 9.9581
Epoch [3/3], Step [589/1618], Loss: 2.0153, Perplexity: 7.5030
Epoch [3/3], Step [590/1618], Loss: 2.0071, Perplexity: 7.4415
Epoch [3/3], Step [591/1618], Loss: 1.8919, Perplexity: 6.6320
Epoch [3/3], Step [592/1618], Loss: 1.9513, Perplexity: 7.0379
Epoch [3/3], Step [593/1618], Loss: 2.3559, Perplexity: 10.5476
Epoch [3/3], Step [594/1618], Loss: 1.9088, Perplexity: 6.7452
Epoch [3/3], Step [595/1618], Loss: 1.9176, Perplexity: 6.8044
Epoch [3/3], Step [596/1618], Loss: 1.9200, Perplexity: 6.8208
Epoch [3/3], Step [597/1618], Loss: 2.1095, Perplexity: 8.2440
Epoch [3/3], Step [598/1618], Loss: 1.8432, Perplexity: 6.3168
Epoch [3/3], Step [599/1618], Loss: 2.1171, Perplexity: 8.3074
Epoch [3/3], Step [600/1618], Loss: 2.0897, Perplexity: 8.0822
Epoch [3/3], Step [601/1618], Loss: 1.9955, Perplexity: 7.3557
Epoch [3/3], Step [602/1618], Loss: 1.9021, Perplexity: 6.7002
Epoch [3/3], Step [603/1618], Loss: 1.9318, Perplexity: 6.9020
Epoch [3/3], Step [604/1618], Loss: 1.9055, Perplexity: 6.7230
Epoch [3/3], Step [605/1618], Loss: 1.9335, Perplexity: 6.9139
Epoch [3/3], Step [606/1618], Loss: 2.0411, Perplexity: 7.6991
Epoch [3/3], Step [607/1618], Loss: 1.8982, Perplexity: 6.6741
Epoch [3/3], Step [608/1618], Loss: 1.8662, Perplexity: 6.4635
Epoch [3/3], Step [609/1618], Loss: 1.9862, Perplexity: 7.2875
Epoch [3/3], Step [610/1618], Loss: 1.9083, Perplexity: 6.7414
Epoch [3/3], Step [611/1618], Loss: 1.8300, Perplexity: 6.2341
Epoch [3/3], Step [612/1618], Loss: 1.9488, Perplexity: 7.0206
Epoch [3/3], Step [613/1618], Loss: 1.9113, Perplexity: 6.7619
Epoch [3/3], Step [614/1618], Loss: 3.2681, Perplexity: 26.2621
Epoch [3/3], Step [615/1618], Loss: 1.9212, Perplexity: 6.8291
Epoch [3/3], Step [616/1618], Loss: 1.8553, Perplexity: 6.3937
Epoch [3/3], Step [617/1618], Loss: 1.9733, Perplexity: 7.1940
Epoch [3/3], Step [618/1618], Loss: 1.9515, Perplexity: 7.0392
Epoch [3/3], Step [619/1618], Loss: 1.9330, Perplexity: 6.9100
Epoch [3/3], Step [620/1618], Loss: 1.9043, Perplexity: 6.7150
Epoch [3/3], Step [621/1618], Loss: 2.1600, Perplexity: 8.6711
Epoch [3/3], Step [622/1618], Loss: 2.0104, Perplexity: 7.4663
Epoch [3/3], Step [623/1618], Loss: 1.9926, Perplexity: 7.3343
Epoch [3/3], Step [624/1618], Loss: 1.9541, Perplexity: 7.0575
Epoch [3/3], Step [625/1618], Loss: 2.2178, Perplexity: 9.1868
Epoch [3/3], Step [626/1618], Loss: 1.8977, Perplexity: 6.6703
Epoch [3/3], Step [627/1618], Loss: 2.0136, Perplexity: 7.4901
Epoch [3/3], Step [628/1618], Loss: 2.1893, Perplexity: 8.9293
Epoch [3/3], Step [629/1618], Loss: 2.0585, Perplexity: 7.8340
Epoch [3/3], Step [630/1618], Loss: 2.0929, Perplexity: 8.1087
Epoch [3/3], Step [631/1618], Loss: 2.4122, Perplexity: 11.1581
Epoch [3/3], Step [632/1618], Loss: 1.8758, Perplexity: 6.5262
Epoch [3/3], Step [633/1618], Loss: 2.0119, Perplexity: 7.4775
Epoch [3/3], Step [634/1618], Loss: 2.0021, Perplexity: 7.4046
Epoch [3/3], Step [635/1618], Loss: 1.9065, Perplexity: 6.7298
Epoch [3/3], Step [636/1618], Loss: 2.1287, Perplexity: 8.4038
Epoch [3/3], Step [637/1618], Loss: 2.2885, Perplexity: 9.8604
Epoch [3/3], Step [638/1618], Loss: 1.9228, Perplexity: 6.8401
Epoch [3/3], Step [639/1618], Loss: 2.4085, Perplexity: 11.1175
Epoch [3/3], Step [640/1618], Loss: 2.3157, Perplexity: 10.1322
Epoch [3/3], Step [641/1618], Loss: 2.0237, Perplexity: 7.5665
Epoch [3/3], Step [642/1618], Loss: 1.8817, Perplexity: 6.5649
Epoch [3/3], Step [643/1618], Loss: 2.0045, Perplexity: 7.4226
Epoch [3/3], Step [644/1618], Loss: 2.3918, Perplexity: 10.9335
Epoch [3/3], Step [645/1618], Loss: 1.9798, Perplexity: 7.2412
Epoch [3/3], Step [646/1618], Loss: 1.8597, Perplexity: 6.4216
Epoch [3/3], Step [647/1618], Loss: 2.0889, Perplexity: 8.0758
Epoch [3/3], Step [648/1618], Loss: 2.1415, Perplexity: 8.5123
Epoch [3/3], Step [649/1618], Loss: 1.9354, Perplexity: 6.9269
Epoch [3/3], Step [650/1618], Loss: 1.9433, Perplexity: 6.9819
Epoch [3/3], Step [651/1618], Loss: 2.0959, Perplexity: 8.1327
Epoch [3/3], Step [652/1618], Loss: 2.0535, Perplexity: 7.7953
Epoch [3/3], Step [653/1618], Loss: 2.0085, Perplexity: 7.4522
Epoch [3/3], Step [654/1618], Loss: 1.9376, Perplexity: 6.9422
Epoch [3/3], Step [655/1618], Loss: 2.2177, Perplexity: 9.1860
Epoch [3/3], Step [656/1618], Loss: 2.1454, Perplexity: 8.5456
Epoch [3/3], Step [657/1618], Loss: 2.0228, Perplexity: 7.5592
Epoch [3/3], Step [658/1618], Loss: 1.9645, Perplexity: 7.1315
Epoch [3/3], Step [659/1618], Loss: 1.9680, Perplexity: 7.1565
Epoch [3/3], Step [660/1618], Loss: 1.9820, Perplexity: 7.2572
Epoch [3/3], Step [661/1618], Loss: 2.4867, Perplexity: 12.0214
Epoch [3/3], Step [662/1618], Loss: 1.9456, Perplexity: 6.9977
Epoch [3/3], Step [663/1618], Loss: 1.9507, Perplexity: 7.0336
Epoch [3/3], Step [664/1618], Loss: 1.9817, Perplexity: 7.2554
Epoch [3/3], Step [665/1618], Loss: 1.9592, Perplexity: 7.0938
Epoch [3/3], Step [666/1618], Loss: 1.9326, Perplexity: 6.9074
Epoch [3/3], Step [667/1618], Loss: 1.8772, Perplexity: 6.5352
Epoch [3/3], Step [668/1618], Loss: 1.9171, Perplexity: 6.8014
Epoch [3/3], Step [669/1618], Loss: 1.9500, Perplexity: 7.0288
Epoch [3/3], Step [670/1618], Loss: 1.9699, Perplexity: 7.1701
Epoch [3/3], Step [671/1618], Loss: 1.9304, Perplexity: 6.8925
Epoch [3/3], Step [672/1618], Loss: 2.0077, Perplexity: 7.4461
Epoch [3/3], Step [673/1618], Loss: 1.8329, Perplexity: 6.2520
Epoch [3/3], Step [674/1618], Loss: 1.9175, Perplexity: 6.8039
Epoch [3/3], Step [675/1618], Loss: 1.9735, Perplexity: 7.1956
Epoch [3/3], Step [676/1618], Loss: 2.0991, Perplexity: 8.1591
Epoch [3/3], Step [677/1618], Loss: 2.2758, Perplexity: 9.7352
Epoch [3/3], Step [678/1618], Loss: 2.0383, Perplexity: 7.6775
Epoch [3/3], Step [679/1618], Loss: 2.0074, Perplexity: 7.4440
Epoch [3/3], Step [680/1618], Loss: 1.8218, Perplexity: 6.1830
Epoch [3/3], Step [681/1618], Loss: 2.0535, Perplexity: 7.7950
Epoch [3/3], Step [682/1618], Loss: 3.0040, Perplexity: 20.1669
Epoch [3/3], Step [683/1618], Loss: 2.0049, Perplexity: 7.4250
Epoch [3/3], Step [684/1618], Loss: 1.9001, Perplexity: 6.6865
Epoch [3/3], Step [685/1618], Loss: 2.0438, Perplexity: 7.7201
Epoch [3/3], Step [686/1618], Loss: 1.9158, Perplexity: 6.7922
Epoch [3/3], Step [687/1618], Loss: 1.9326, Perplexity: 6.9073
Epoch [3/3], Step [688/1618], Loss: 2.0773, Perplexity: 7.9830
Epoch [3/3], Step [689/1618], Loss: 1.8803, Perplexity: 6.5558
Epoch [3/3], Step [690/1618], Loss: 1.8673, Perplexity: 6.4708
Epoch [3/3], Step [691/1618], Loss: 1.9118, Perplexity: 6.7653
Epoch [3/3], Step [692/1618], Loss: 1.9001, Perplexity: 6.6865
Epoch [3/3], Step [693/1618], Loss: 1.9931, Perplexity: 7.3379
Epoch [3/3], Step [694/1618], Loss: 1.9505, Perplexity: 7.0323
Epoch [3/3], Step [695/1618], Loss: 1.8815, Perplexity: 6.5635
Epoch [3/3], Step [696/1618], Loss: 2.6150, Perplexity: 13.6667
Epoch [3/3], Step [697/1618], Loss: 1.9078, Perplexity: 6.7383
Epoch [3/3], Step [698/1618], Loss: 2.0066, Perplexity: 7.4383
Epoch [3/3], Step [699/1618], Loss: 1.9673, Perplexity: 7.1515
Epoch [3/3], Step [700/1618], Loss: 2.1413, Perplexity: 8.5103
Epoch [3/3], Step [701/1618], Loss: 2.0451, Perplexity: 7.7297
Epoch [3/3], Step [702/1618], Loss: 1.8681, Perplexity: 6.4763
Epoch [3/3], Step [703/1618], Loss: 1.8825, Perplexity: 6.5701
Epoch [3/3], Step [704/1618], Loss: 1.9411, Perplexity: 6.9665
Epoch [3/3], Step [705/1618], Loss: 1.8877, Perplexity: 6.6040
Epoch [3/3], Step [706/1618], Loss: 1.8229, Perplexity: 6.1899
Epoch [3/3], Step [707/1618], Loss: 2.0507, Perplexity: 7.7731
Epoch [3/3], Step [708/1618], Loss: 2.2604, Perplexity: 9.5871
Epoch [3/3], Step [709/1618], Loss: 1.9077, Perplexity: 6.7379
Epoch [3/3], Step [710/1618], Loss: 1.8964, Perplexity: 6.6618
Epoch [3/3], Step [711/1618], Loss: 1.9872, Perplexity: 7.2951
Epoch [3/3], Step [712/1618], Loss: 2.0134, Perplexity: 7.4887
Epoch [3/3], Step [713/1618], Loss: 2.3568, Perplexity: 10.5571
Epoch [3/3], Step [714/1618], Loss: 1.9863, Perplexity: 7.2886
Epoch [3/3], Step [715/1618], Loss: 2.1062, Perplexity: 8.2170
Epoch [3/3], Step [716/1618], Loss: 2.0362, Perplexity: 7.6615
Epoch [3/3], Step [717/1618], Loss: 2.0034, Perplexity: 7.4145
Epoch [3/3], Step [718/1618], Loss: 1.7970, Perplexity: 6.0314
Epoch [3/3], Step [719/1618], Loss: 1.9851, Perplexity: 7.2800
Epoch [3/3], Step [720/1618], Loss: 2.0533, Perplexity: 7.7934
Epoch [3/3], Step [721/1618], Loss: 1.9680, Perplexity: 7.1560
Epoch [3/3], Step [722/1618], Loss: 2.2640, Perplexity: 9.6218
Epoch [3/3], Step [723/1618], Loss: 2.0231, Perplexity: 7.5615
Epoch [3/3], Step [724/1618], Loss: 2.2507, Perplexity: 9.4947
Epoch [3/3], Step [725/1618], Loss: 1.9753, Perplexity: 7.2088
Epoch [3/3], Step [726/1618], Loss: 1.9127, Perplexity: 6.7716
Epoch [3/3], Step [727/1618], Loss: 2.1068, Perplexity: 8.2215
Epoch [3/3], Step [728/1618], Loss: 1.9318, Perplexity: 6.9022
Epoch [3/3], Step [729/1618], Loss: 2.2365, Perplexity: 9.3605
Epoch [3/3], Step [730/1618], Loss: 1.9505, Perplexity: 7.0325
Epoch [3/3], Step [731/1618], Loss: 1.9382, Perplexity: 6.9460
Epoch [3/3], Step [732/1618], Loss: 1.8758, Perplexity: 6.5262
Epoch [3/3], Step [733/1618], Loss: 1.9812, Perplexity: 7.2518
Epoch [3/3], Step [734/1618], Loss: 1.8653, Perplexity: 6.4579
Epoch [3/3], Step [735/1618], Loss: 2.2624, Perplexity: 9.6061
Epoch [3/3], Step [736/1618], Loss: 1.9450, Perplexity: 6.9936
Epoch [3/3], Step [737/1618], Loss: 2.1772, Perplexity: 8.8219
Epoch [3/3], Step [738/1618], Loss: 1.9777, Perplexity: 7.2259
Epoch [3/3], Step [739/1618], Loss: 1.9340, Perplexity: 6.9169
Epoch [3/3], Step [740/1618], Loss: 1.7851, Perplexity: 5.9604
Epoch [3/3], Step [741/1618], Loss: 1.9848, Perplexity: 7.2775
Epoch [3/3], Step [742/1618], Loss: 2.1249, Perplexity: 8.3717
Epoch [3/3], Step [743/1618], Loss: 2.2149, Perplexity: 9.1605
Epoch [3/3], Step [744/1618], Loss: 1.9350, Perplexity: 6.9238
Epoch [3/3], Step [745/1618], Loss: 2.0617, Perplexity: 7.8596
Epoch [3/3], Step [746/1618], Loss: 1.8575, Perplexity: 6.4079
Epoch [3/3], Step [747/1618], Loss: 1.9603, Perplexity: 7.1017
Epoch [3/3], Step [748/1618], Loss: 1.9670, Perplexity: 7.1494
Epoch [3/3], Step [749/1618], Loss: 1.8689, Perplexity: 6.4811
Epoch [3/3], Step [750/1618], Loss: 1.9254, Perplexity: 6.8578
Epoch [3/3], Step [751/1618], Loss: 1.9399, Perplexity: 6.9579
Epoch [3/3], Step [752/1618], Loss: 1.9402, Perplexity: 6.9603
Epoch [3/3], Step [753/1618], Loss: 1.8661, Perplexity: 6.4632
Epoch [3/3], Step [754/1618], Loss: 1.9308, Perplexity: 6.8953
Epoch [3/3], Step [755/1618], Loss: 1.8108, Perplexity: 6.1150
Epoch [3/3], Step [756/1618], Loss: 1.9351, Perplexity: 6.9248
Epoch [3/3], Step [757/1618], Loss: 1.9967, Perplexity: 7.3645
Epoch [3/3], Step [758/1618], Loss: 1.9555, Perplexity: 7.0673
Epoch [3/3], Step [759/1618], Loss: 1.9826, Perplexity: 7.2618
Epoch [3/3], Step [760/1618], Loss: 1.8628, Perplexity: 6.4417
Epoch [3/3], Step [761/1618], Loss: 1.9500, Perplexity: 7.0287
Epoch [3/3], Step [762/1618], Loss: 1.8718, Perplexity: 6.4999
Epoch [3/3], Step [763/1618], Loss: 1.9498, Perplexity: 7.0270
Epoch [3/3], Step [764/1618], Loss: 1.8992, Perplexity: 6.6808
Epoch [3/3], Step [765/1618], Loss: 2.3127, Perplexity: 10.1016
Epoch [3/3], Step [766/1618], Loss: 1.8835, Perplexity: 6.5765
Epoch [3/3], Step [767/1618], Loss: 1.9343, Perplexity: 6.9192
Epoch [3/3], Step [768/1618], Loss: 2.1963, Perplexity: 8.9915
Epoch [3/3], Step [769/1618], Loss: 2.2562, Perplexity: 9.5468
Epoch [3/3], Step [770/1618], Loss: 1.9870, Perplexity: 7.2938
Epoch [3/3], Step [771/1618], Loss: 1.8588, Perplexity: 6.4158
Epoch [3/3], Step [772/1618], Loss: 2.3175, Perplexity: 10.1507
Epoch [3/3], Step [773/1618], Loss: 1.9783, Perplexity: 7.2308
Epoch [3/3], Step [774/1618], Loss: 2.1539, Perplexity: 8.6181
Epoch [3/3], Step [775/1618], Loss: 1.9524, Perplexity: 7.0454
Epoch [3/3], Step [776/1618], Loss: 2.1078, Perplexity: 8.2298
Epoch [3/3], Step [777/1618], Loss: 1.8861, Perplexity: 6.5937
Epoch [3/3], Step [778/1618], Loss: 2.0659, Perplexity: 7.8925
Epoch [3/3], Step [779/1618], Loss: 2.1295, Perplexity: 8.4104
Epoch [3/3], Step [780/1618], Loss: 1.9095, Perplexity: 6.7496
Epoch [3/3], Step [781/1618], Loss: 1.9210, Perplexity: 6.8280
Epoch [3/3], Step [782/1618], Loss: 1.9858, Perplexity: 7.2846
Epoch [3/3], Step [783/1618], Loss: 2.0962, Perplexity: 8.1353
Epoch [3/3], Step [784/1618], Loss: 1.8976, Perplexity: 6.6702
Epoch [3/3], Step [785/1618], Loss: 1.9501, Perplexity: 7.0294
Epoch [3/3], Step [786/1618], Loss: 1.9284, Perplexity: 6.8784
Epoch [3/3], Step [787/1618], Loss: 1.8598, Perplexity: 6.4222
Epoch [3/3], Step [788/1618], Loss: 2.1791, Perplexity: 8.8380
Epoch [3/3], Step [789/1618], Loss: 2.1267, Perplexity: 8.3875
Epoch [3/3], Step [790/1618], Loss: 1.9325, Perplexity: 6.9064
Epoch [3/3], Step [791/1618], Loss: 1.9821, Perplexity: 7.2578
Epoch [3/3], Step [792/1618], Loss: 1.8654, Perplexity: 6.4584
Epoch [3/3], Step [793/1618], Loss: 1.9361, Perplexity: 6.9320
Epoch [3/3], Step [794/1618], Loss: 1.8726, Perplexity: 6.5053
Epoch [3/3], Step [795/1618], Loss: 1.9971, Perplexity: 7.3680
Epoch [3/3], Step [796/1618], Loss: 1.8728, Perplexity: 6.5065
Epoch [3/3], Step [797/1618], Loss: 2.0998, Perplexity: 8.1643
Epoch [3/3], Step [798/1618], Loss: 2.0032, Perplexity: 7.4125
Epoch [3/3], Step [799/1618], Loss: 2.0935, Perplexity: 8.1129
Epoch [3/3], Step [800/1618], Loss: 1.9404, Perplexity: 6.9616
Epoch [3/3], Step [801/1618], Loss: 1.9175, Perplexity: 6.8041
Epoch [3/3], Step [802/1618], Loss: 1.9933, Perplexity: 7.3397
Epoch [3/3], Step [803/1618], Loss: 1.8927, Perplexity: 6.6372
Epoch [3/3], Step [804/1618], Loss: 2.1583, Perplexity: 8.6561
Epoch [3/3], Step [805/1618], Loss: 1.8777, Perplexity: 6.5386
Epoch [3/3], Step [806/1618], Loss: 1.9415, Perplexity: 6.9690
Epoch [3/3], Step [807/1618], Loss: 2.1006, Perplexity: 8.1712
Epoch [3/3], Step [808/1618], Loss: 1.8841, Perplexity: 6.5805
Epoch [3/3], Step [809/1618], Loss: 1.9654, Perplexity: 7.1381
Epoch [3/3], Step [810/1618], Loss: 2.0685, Perplexity: 7.9129
Epoch [3/3], Step [811/1618], Loss: 2.1832, Perplexity: 8.8745
Epoch [3/3], Step [812/1618], Loss: 1.8576, Perplexity: 6.4085
Epoch [3/3], Step [813/1618], Loss: 1.9270, Perplexity: 6.8687
Epoch [3/3], Step [814/1618], Loss: 1.9349, Perplexity: 6.9232
Epoch [3/3], Step [815/1618], Loss: 1.9900, Perplexity: 7.3155
Epoch [3/3], Step [816/1618], Loss: 1.8716, Perplexity: 6.4988
Epoch [3/3], Step [817/1618], Loss: 1.9822, Perplexity: 7.2584
Epoch [3/3], Step [818/1618], Loss: 2.0189, Perplexity: 7.5298
Epoch [3/3], Step [819/1618], Loss: 1.9362, Perplexity: 6.9325
Epoch [3/3], Step [820/1618], Loss: 1.8644, Perplexity: 6.4520
Epoch [3/3], Step [821/1618], Loss: 1.9215, Perplexity: 6.8314
Epoch [3/3], Step [822/1618], Loss: 1.9491, Perplexity: 7.0222
Epoch [3/3], Step [823/1618], Loss: 1.9941, Perplexity: 7.3458
Epoch [3/3], Step [824/1618], Loss: 1.9789, Perplexity: 7.2346
Epoch [3/3], Step [825/1618], Loss: 1.8863, Perplexity: 6.5948
Epoch [3/3], Step [826/1618], Loss: 1.9382, Perplexity: 6.9461
Epoch [3/3], Step [827/1618], Loss: 1.9345, Perplexity: 6.9204
Epoch [3/3], Step [828/1618], Loss: 1.8252, Perplexity: 6.2038
Epoch [3/3], Step [829/1618], Loss: 1.8817, Perplexity: 6.5644
Epoch [3/3], Step [830/1618], Loss: 1.9733, Perplexity: 7.1947
Epoch [3/3], Step [831/1618], Loss: 1.8116, Perplexity: 6.1205
Epoch [3/3], Step [832/1618], Loss: 1.9572, Perplexity: 7.0794
Epoch [3/3], Step [833/1618], Loss: 2.0779, Perplexity: 7.9877
Epoch [3/3], Step [834/1618], Loss: 1.8043, Perplexity: 6.0757
Epoch [3/3], Step [835/1618], Loss: 1.9317, Perplexity: 6.9014
Epoch [3/3], Step [836/1618], Loss: 1.9863, Perplexity: 7.2883
Epoch [3/3], Step [837/1618], Loss: 2.5464, Perplexity: 12.7615
Epoch [3/3], Step [838/1618], Loss: 1.9921, Perplexity: 7.3310
Epoch [3/3], Step [839/1618], Loss: 2.1982, Perplexity: 9.0084
Epoch [3/3], Step [840/1618], Loss: 2.0168, Perplexity: 7.5139
Epoch [3/3], Step [841/1618], Loss: 1.9844, Perplexity: 7.2749
Epoch [3/3], Step [842/1618], Loss: 2.0111, Perplexity: 7.4717
Epoch [3/3], Step [843/1618], Loss: 1.8140, Perplexity: 6.1347
Epoch [3/3], Step [844/1618], Loss: 1.9350, Perplexity: 6.9239
Epoch [3/3], Step [845/1618], Loss: 1.9304, Perplexity: 6.8923
Epoch [3/3], Step [846/1618], Loss: 2.4944, Perplexity: 12.1148
Epoch [3/3], Step [847/1618], Loss: 1.9514, Perplexity: 7.0384
Epoch [3/3], Step [848/1618], Loss: 1.8007, Perplexity: 6.0540
Epoch [3/3], Step [849/1618], Loss: 1.8578, Perplexity: 6.4098
Epoch [3/3], Step [850/1618], Loss: 2.0039, Perplexity: 7.4180
Epoch [3/3], Step [851/1618], Loss: 1.9266, Perplexity: 6.8659
Epoch [3/3], Step [852/1618], Loss: 2.1010, Perplexity: 8.1747
Epoch [3/3], Step [853/1618], Loss: 1.9019, Perplexity: 6.6983
Epoch [3/3], Step [854/1618], Loss: 3.1379, Perplexity: 23.0543
Epoch [3/3], Step [855/1618], Loss: 2.0119, Perplexity: 7.4776
Epoch [3/3], Step [856/1618], Loss: 1.8279, Perplexity: 6.2211
Epoch [3/3], Step [857/1618], Loss: 2.2834, Perplexity: 9.8098
Epoch [3/3], Step [858/1618], Loss: 2.0195, Perplexity: 7.5342
Epoch [3/3], Step [859/1618], Loss: 1.9805, Perplexity: 7.2462
Epoch [3/3], Step [860/1618], Loss: 1.9036, Perplexity: 6.7103
Epoch [3/3], Step [861/1618], Loss: 2.0230, Perplexity: 7.5613
Epoch [3/3], Step [862/1618], Loss: 1.9194, Perplexity: 6.8172
Epoch [3/3], Step [863/1618], Loss: 1.9192, Perplexity: 6.8157
Epoch [3/3], Step [864/1618], Loss: 1.8957, Perplexity: 6.6575
Epoch [3/3], Step [865/1618], Loss: 1.8750, Perplexity: 6.5206
Epoch [3/3], Step [866/1618], Loss: 1.9156, Perplexity: 6.7909
Epoch [3/3], Step [867/1618], Loss: 1.9679, Perplexity: 7.1554
Epoch [3/3], Step [868/1618], Loss: 1.9284, Perplexity: 6.8787
Epoch [3/3], Step [869/1618], Loss: 2.8759, Perplexity: 17.7416
Epoch [3/3], Step [870/1618], Loss: 1.9433, Perplexity: 6.9817
Epoch [3/3], Step [871/1618], Loss: 1.9298, Perplexity: 6.8880
Epoch [3/3], Step [872/1618], Loss: 1.9270, Perplexity: 6.8690
Epoch [3/3], Step [873/1618], Loss: 1.9190, Perplexity: 6.8144
Epoch [3/3], Step [874/1618], Loss: 1.9924, Perplexity: 7.3331
Epoch [3/3], Step [875/1618], Loss: 1.9105, Perplexity: 6.7568
Epoch [3/3], Step [876/1618], Loss: 1.9141, Perplexity: 6.7810
Epoch [3/3], Step [877/1618], Loss: 1.9030, Perplexity: 6.7059
Epoch [3/3], Step [878/1618], Loss: 1.9189, Perplexity: 6.8132
Epoch [3/3], Step [879/1618], Loss: 2.0888, Perplexity: 8.0751
Epoch [3/3], Step [880/1618], Loss: 1.9003, Perplexity: 6.6880
Epoch [3/3], Step [881/1618], Loss: 2.0596, Perplexity: 7.8432
Epoch [3/3], Step [882/1618], Loss: 1.8856, Perplexity: 6.5901
Epoch [3/3], Step [883/1618], Loss: 1.8010, Perplexity: 6.0557
Epoch [3/3], Step [884/1618], Loss: 1.9722, Perplexity: 7.1867
Epoch [3/3], Step [885/1618], Loss: 1.9702, Perplexity: 7.1721
Epoch [3/3], Step [886/1618], Loss: 1.8846, Perplexity: 6.5840
Epoch [3/3], Step [887/1618], Loss: 1.8813, Perplexity: 6.5618
Epoch [3/3], Step [888/1618], Loss: 1.9526, Perplexity: 7.0468
Epoch [3/3], Step [889/1618], Loss: 1.8350, Perplexity: 6.2649
Epoch [3/3], Step [890/1618], Loss: 1.9409, Perplexity: 6.9653
Epoch [3/3], Step [891/1618], Loss: 1.9163, Perplexity: 6.7955
Epoch [3/3], Step [892/1618], Loss: 2.1254, Perplexity: 8.3758
Epoch [3/3], Step [893/1618], Loss: 1.9928, Perplexity: 7.3362
Epoch [3/3], Step [894/1618], Loss: 1.9972, Perplexity: 7.3685
Epoch [3/3], Step [895/1618], Loss: 1.8120, Perplexity: 6.1227
Epoch [3/3], Step [896/1618], Loss: 2.0035, Perplexity: 7.4147
Epoch [3/3], Step [897/1618], Loss: 1.8117, Perplexity: 6.1206
Epoch [3/3], Step [898/1618], Loss: 2.2299, Perplexity: 9.2987
Epoch [3/3], Step [899/1618], Loss: 1.8619, Perplexity: 6.4356
Epoch [3/3], Step [900/1618], Loss: 1.8380, Perplexity: 6.2840
Epoch [3/3], Step [901/1618], Loss: 1.8889, Perplexity: 6.6124
Epoch [3/3], Step [902/1618], Loss: 1.8883, Perplexity: 6.6082
Epoch [3/3], Step [903/1618], Loss: 2.1883, Perplexity: 8.9202
Epoch [3/3], Step [904/1618], Loss: 1.9446, Perplexity: 6.9907
Epoch [3/3], Step [905/1618], Loss: 2.0001, Perplexity: 7.3897
Epoch [3/3], Step [906/1618], Loss: 1.9001, Perplexity: 6.6865
Epoch [3/3], Step [907/1618], Loss: 1.7786, Perplexity: 5.9213
Epoch [3/3], Step [908/1618], Loss: 2.1496, Perplexity: 8.5810
Epoch [3/3], Step [909/1618], Loss: 1.9381, Perplexity: 6.9456
Epoch [3/3], Step [910/1618], Loss: 2.0552, Perplexity: 7.8087
Epoch [3/3], Step [911/1618], Loss: 2.0092, Perplexity: 7.4575
Epoch [3/3], Step [912/1618], Loss: 1.8993, Perplexity: 6.6814
Epoch [3/3], Step [913/1618], Loss: 1.9379, Perplexity: 6.9439
Epoch [3/3], Step [914/1618], Loss: 1.9299, Perplexity: 6.8889
Epoch [3/3], Step [915/1618], Loss: 1.8745, Perplexity: 6.5175
Epoch [3/3], Step [916/1618], Loss: 2.0478, Perplexity: 7.7510
Epoch [3/3], Step [917/1618], Loss: 1.8886, Perplexity: 6.6104
Epoch [3/3], Step [918/1618], Loss: 1.8508, Perplexity: 6.3648
Epoch [3/3], Step [919/1618], Loss: 2.0683, Perplexity: 7.9112
Epoch [3/3], Step [920/1618], Loss: 1.9265, Perplexity: 6.8652
Epoch [3/3], Step [921/1618], Loss: 2.9013, Perplexity: 18.1986
Epoch [3/3], Step [922/1618], Loss: 1.9186, Perplexity: 6.8112
Epoch [3/3], Step [923/1618], Loss: 2.1214, Perplexity: 8.3426
Epoch [3/3], Step [924/1618], Loss: 2.0927, Perplexity: 8.1069
Epoch [3/3], Step [925/1618], Loss: 1.9364, Perplexity: 6.9335
Epoch [3/3], Step [926/1618], Loss: 2.1663, Perplexity: 8.7259
Epoch [3/3], Step [927/1618], Loss: 1.8553, Perplexity: 6.3938
Epoch [3/3], Step [928/1618], Loss: 1.9644, Perplexity: 7.1310
Epoch [3/3], Step [929/1618], Loss: 2.2614, Perplexity: 9.5969
Epoch [3/3], Step [930/1618], Loss: 2.1373, Perplexity: 8.4761
Epoch [3/3], Step [931/1618], Loss: 1.8625, Perplexity: 6.4395
Epoch [3/3], Step [932/1618], Loss: 1.8642, Perplexity: 6.4509
Epoch [3/3], Step [933/1618], Loss: 1.9985, Perplexity: 7.3780
Epoch [3/3], Step [934/1618], Loss: 2.0040, Perplexity: 7.4186
Epoch [3/3], Step [935/1618], Loss: 1.9238, Perplexity: 6.8467
Epoch [3/3], Step [936/1618], Loss: 1.9681, Perplexity: 7.1574
Epoch [3/3], Step [937/1618], Loss: 2.0344, Perplexity: 7.6477
Epoch [3/3], Step [938/1618], Loss: 2.0409, Perplexity: 7.6972
Epoch [3/3], Step [939/1618], Loss: 2.1428, Perplexity: 8.5235
Epoch [3/3], Step [940/1618], Loss: 1.9702, Perplexity: 7.1719
Epoch [3/3], Step [941/1618], Loss: 1.9169, Perplexity: 6.7999
Epoch [3/3], Step [942/1618], Loss: 1.9347, Perplexity: 6.9223
Epoch [3/3], Step [943/1618], Loss: 1.8753, Perplexity: 6.5230
Epoch [3/3], Step [944/1618], Loss: 2.0161, Perplexity: 7.5088
Epoch [3/3], Step [945/1618], Loss: 1.9392, Perplexity: 6.9533
Epoch [3/3], Step [946/1618], Loss: 1.9362, Perplexity: 6.9320
Epoch [3/3], Step [947/1618], Loss: 2.0498, Perplexity: 7.7662
Epoch [3/3], Step [948/1618], Loss: 1.8671, Perplexity: 6.4693
Epoch [3/3], Step [949/1618], Loss: 2.3595, Perplexity: 10.5856
Epoch [3/3], Step [950/1618], Loss: 1.9752, Perplexity: 7.2079
Epoch [3/3], Step [951/1618], Loss: 2.0088, Perplexity: 7.4541
Epoch [3/3], Step [952/1618], Loss: 1.9576, Perplexity: 7.0823
Epoch [3/3], Step [953/1618], Loss: 2.3654, Perplexity: 10.6478
Epoch [3/3], Step [954/1618], Loss: 1.9709, Perplexity: 7.1769
Epoch [3/3], Step [955/1618], Loss: 2.0004, Perplexity: 7.3922
Epoch [3/3], Step [956/1618], Loss: 2.0001, Perplexity: 7.3898
Epoch [3/3], Step [957/1618], Loss: 2.0007, Perplexity: 7.3941
Epoch [3/3], Step [958/1618], Loss: 2.1392, Perplexity: 8.4925
Epoch [3/3], Step [959/1618], Loss: 2.0048, Perplexity: 7.4247
Epoch [3/3], Step [960/1618], Loss: 1.8434, Perplexity: 6.3182
Epoch [3/3], Step [961/1618], Loss: 1.9435, Perplexity: 6.9832
Epoch [3/3], Step [962/1618], Loss: 1.9841, Perplexity: 7.2728
Epoch [3/3], Step [963/1618], Loss: 2.1352, Perplexity: 8.4586
Epoch [3/3], Step [964/1618], Loss: 1.9501, Perplexity: 7.0291
Epoch [3/3], Step [965/1618], Loss: 1.8704, Perplexity: 6.4907
Epoch [3/3], Step [966/1618], Loss: 1.9363, Perplexity: 6.9334
Epoch [3/3], Step [967/1618], Loss: 1.9232, Perplexity: 6.8431
Epoch [3/3], Step [968/1618], Loss: 2.3921, Perplexity: 10.9365
Epoch [3/3], Step [969/1618], Loss: 1.8463, Perplexity: 6.3363
Epoch [3/3], Step [970/1618], Loss: 2.0801, Perplexity: 8.0056
Epoch [3/3], Step [971/1618], Loss: 1.9954, Perplexity: 7.3549
Epoch [3/3], Step [972/1618], Loss: 2.0384, Perplexity: 7.6786
Epoch [3/3], Step [973/1618], Loss: 1.8556, Perplexity: 6.3953
Epoch [3/3], Step [974/1618], Loss: 1.9131, Perplexity: 6.7739
Epoch [3/3], Step [975/1618], Loss: 1.9717, Perplexity: 7.1829
Epoch [3/3], Step [976/1618], Loss: 1.9697, Perplexity: 7.1685
Epoch [3/3], Step [977/1618], Loss: 1.9608, Perplexity: 7.1047
Epoch [3/3], Step [978/1618], Loss: 1.9936, Perplexity: 7.3417
Epoch [3/3], Step [979/1618], Loss: 1.9713, Perplexity: 7.1797
Epoch [3/3], Step [980/1618], Loss: 1.8148, Perplexity: 6.1397
Epoch [3/3], Step [981/1618], Loss: 2.2382, Perplexity: 9.3762
Epoch [3/3], Step [982/1618], Loss: 2.3981, Perplexity: 11.0024
Epoch [3/3], Step [983/1618], Loss: 1.8354, Perplexity: 6.2678
Epoch [3/3], Step [984/1618], Loss: 1.9929, Perplexity: 7.3371
Epoch [3/3], Step [985/1618], Loss: 1.8078, Perplexity: 6.0970
Epoch [3/3], Step [986/1618], Loss: 1.9827, Perplexity: 7.2625
Epoch [3/3], Step [987/1618], Loss: 1.8500, Perplexity: 6.3595
Epoch [3/3], Step [988/1618], Loss: 2.1681, Perplexity: 8.7416
Epoch [3/3], Step [989/1618], Loss: 1.8033, Perplexity: 6.0699
Epoch [3/3], Step [990/1618], Loss: 1.9714, Perplexity: 7.1807
Epoch [3/3], Step [991/1618], Loss: 2.1428, Perplexity: 8.5236
Epoch [3/3], Step [992/1618], Loss: 1.8618, Perplexity: 6.4351
Epoch [3/3], Step [993/1618], Loss: 1.8637, Perplexity: 6.4474
Epoch [3/3], Step [994/1618], Loss: 2.4307, Perplexity: 11.3671
Epoch [3/3], Step [995/1618], Loss: 1.8820, Perplexity: 6.5665
Epoch [3/3], Step [996/1618], Loss: 1.8973, Perplexity: 6.6677
Epoch [3/3], Step [997/1618], Loss: 1.9474, Perplexity: 7.0106
Epoch [3/3], Step [998/1618], Loss: 1.8346, Perplexity: 6.2625
Epoch [3/3], Step [999/1618], Loss: 1.8639, Perplexity: 6.4488
Epoch [3/3], Step [1000/1618], Loss: 1.9974, Perplexity: 7.3699
Epoch [3/3], Step [1001/1618], Loss: 1.7958, Perplexity: 6.0241
Epoch [3/3], Step [1002/1618], Loss: 1.9523, Perplexity: 7.0449
Epoch [3/3], Step [1003/1618], Loss: 1.8981, Perplexity: 6.6734
Epoch [3/3], Step [1004/1618], Loss: 1.8202, Perplexity: 6.1728
Epoch [3/3], Step [1005/1618], Loss: 2.0877, Perplexity: 8.0662
Epoch [3/3], Step [1006/1618], Loss: 1.8588, Perplexity: 6.4163
Epoch [3/3], Step [1007/1618], Loss: 1.9060, Perplexity: 6.7264
Epoch [3/3], Step [1008/1618], Loss: 1.8357, Perplexity: 6.2694
Epoch [3/3], Step [1009/1618], Loss: 2.0039, Perplexity: 7.4182
Epoch [3/3], Step [1010/1618], Loss: 1.9035, Perplexity: 6.7093
Epoch [3/3], Step [1011/1618], Loss: 2.2236, Perplexity: 9.2404
Epoch [3/3], Step [1012/1618], Loss: 1.9654, Perplexity: 7.1381
Epoch [3/3], Step [1013/1618], Loss: 1.8195, Perplexity: 6.1685
Epoch [3/3], Step [1014/1618], Loss: 1.9673, Perplexity: 7.1514
Epoch [3/3], Step [1015/1618], Loss: 1.8106, Perplexity: 6.1141
Epoch [3/3], Step [1016/1618], Loss: 1.9349, Perplexity: 6.9236
Epoch [3/3], Step [1017/1618], Loss: 1.9882, Perplexity: 7.3026
Epoch [3/3], Step [1018/1618], Loss: 1.8802, Perplexity: 6.5547
Epoch [3/3], Step [1019/1618], Loss: 1.9014, Perplexity: 6.6954
Epoch [3/3], Step [1020/1618], Loss: 1.8538, Perplexity: 6.3842
Epoch [3/3], Step [1021/1618], Loss: 1.8480, Perplexity: 6.3472
Epoch [3/3], Step [1022/1618], Loss: 1.9695, Perplexity: 7.1671
Epoch [3/3], Step [1023/1618], Loss: 1.8394, Perplexity: 6.2925
Epoch [3/3], Step [1024/1618], Loss: 1.8670, Perplexity: 6.4687
Epoch [3/3], Step [1025/1618], Loss: 1.8882, Perplexity: 6.6072
Epoch [3/3], Step [1026/1618], Loss: 1.8463, Perplexity: 6.3365
Epoch [3/3], Step [1027/1618], Loss: 2.6852, Perplexity: 14.6609
Epoch [3/3], Step [1028/1618], Loss: 1.9795, Perplexity: 7.2391
Epoch [3/3], Step [1029/1618], Loss: 2.2888, Perplexity: 9.8631
Epoch [3/3], Step [1030/1618], Loss: 1.8786, Perplexity: 6.5446
Epoch [3/3], Step [1031/1618], Loss: 1.9796, Perplexity: 7.2396
Epoch [3/3], Step [1032/1618], Loss: 2.2393, Perplexity: 9.3866
Epoch [3/3], Step [1033/1618], Loss: 1.9231, Perplexity: 6.8418
Epoch [3/3], Step [1034/1618], Loss: 1.8376, Perplexity: 6.2815
Epoch [3/3], Step [1035/1618], Loss: 1.9370, Perplexity: 6.9378
Epoch [3/3], Step [1036/1618], Loss: 1.9038, Perplexity: 6.7113
Epoch [3/3], Step [1037/1618], Loss: 1.8986, Perplexity: 6.6766
Epoch [3/3], Step [1038/1618], Loss: 1.9056, Perplexity: 6.7236
Epoch [3/3], Step [1039/1618], Loss: 1.8839, Perplexity: 6.5789
Epoch [3/3], Step [1040/1618], Loss: 1.8076, Perplexity: 6.0957
Epoch [3/3], Step [1041/1618], Loss: 2.0089, Perplexity: 7.4548
Epoch [3/3], Step [1042/1618], Loss: 2.0805, Perplexity: 8.0084
Epoch [3/3], Step [1043/1618], Loss: 1.8411, Perplexity: 6.3036
Epoch [3/3], Step [1044/1618], Loss: 2.0676, Perplexity: 7.9057
Epoch [3/3], Step [1045/1618], Loss: 2.1814, Perplexity: 8.8590
Epoch [3/3], Step [1046/1618], Loss: 1.8342, Perplexity: 6.2604
Epoch [3/3], Step [1047/1618], Loss: 2.4226, Perplexity: 11.2755
Epoch [3/3], Step [1048/1618], Loss: 1.8401, Perplexity: 6.2970
Epoch [3/3], Step [1049/1618], Loss: 1.8653, Perplexity: 6.4578
Epoch [3/3], Step [1050/1618], Loss: 2.1577, Perplexity: 8.6510
Epoch [3/3], Step [1051/1618], Loss: 2.1928, Perplexity: 8.9602
Epoch [3/3], Step [1052/1618], Loss: 1.8467, Perplexity: 6.3387
Epoch [3/3], Step [1053/1618], Loss: 1.8382, Perplexity: 6.2850
Epoch [3/3], Step [1054/1618], Loss: 1.9317, Perplexity: 6.9010
Epoch [3/3], Step [1055/1618], Loss: 1.9323, Perplexity: 6.9057
Epoch [3/3], Step [1056/1618], Loss: 1.9892, Perplexity: 7.3098
Epoch [3/3], Step [1057/1618], Loss: 1.9135, Perplexity: 6.7766
Epoch [3/3], Step [1058/1618], Loss: 1.9547, Perplexity: 7.0618
Epoch [3/3], Step [1059/1618], Loss: 1.7961, Perplexity: 6.0263
Epoch [3/3], Step [1060/1618], Loss: 1.8660, Perplexity: 6.4627
Epoch [3/3], Step [1061/1618], Loss: 2.1215, Perplexity: 8.3438
Epoch [3/3], Step [1062/1618], Loss: 1.8408, Perplexity: 6.3017
Epoch [3/3], Step [1063/1618], Loss: 2.0097, Perplexity: 7.4612
Epoch [3/3], Step [1064/1618], Loss: 2.1201, Perplexity: 8.3317
Epoch [3/3], Step [1065/1618], Loss: 1.8880, Perplexity: 6.6059
Epoch [3/3], Step [1066/1618], Loss: 2.0161, Perplexity: 7.5092
Epoch [3/3], Step [1067/1618], Loss: 2.4204, Perplexity: 11.2504
Epoch [3/3], Step [1068/1618], Loss: 2.0073, Perplexity: 7.4428
Epoch [3/3], Step [1069/1618], Loss: 1.9684, Perplexity: 7.1595
Epoch [3/3], Step [1070/1618], Loss: 2.2380, Perplexity: 9.3744
Epoch [3/3], Step [1071/1618], Loss: 2.3435, Perplexity: 10.4171
Epoch [3/3], Step [1072/1618], Loss: 1.9443, Perplexity: 6.9887
Epoch [3/3], Step [1073/1618], Loss: 2.0887, Perplexity: 8.0742
Epoch [3/3], Step [1074/1618], Loss: 1.9148, Perplexity: 6.7855
Epoch [3/3], Step [1075/1618], Loss: 1.8547, Perplexity: 6.3901
Epoch [3/3], Step [1076/1618], Loss: 1.9398, Perplexity: 6.9572
Epoch [3/3], Step [1077/1618], Loss: 1.9768, Perplexity: 7.2194
Epoch [3/3], Step [1078/1618], Loss: 2.2400, Perplexity: 9.3933
Epoch [3/3], Step [1079/1618], Loss: 1.8892, Perplexity: 6.6143
Epoch [3/3], Step [1080/1618], Loss: 1.8805, Perplexity: 6.5568
Epoch [3/3], Step [1081/1618], Loss: 1.8858, Perplexity: 6.5914
Epoch [3/3], Step [1082/1618], Loss: 2.4230, Perplexity: 11.2799
Epoch [3/3], Step [1083/1618], Loss: 1.8915, Perplexity: 6.6293
Epoch [3/3], Step [1084/1618], Loss: 2.0197, Perplexity: 7.5361
Epoch [3/3], Step [1085/1618], Loss: 2.1076, Perplexity: 8.2286
Epoch [3/3], Step [1086/1618], Loss: 2.2508, Perplexity: 9.4954
Epoch [3/3], Step [1087/1618], Loss: 1.9368, Perplexity: 6.9367
Epoch [3/3], Step [1088/1618], Loss: 1.8618, Perplexity: 6.4354
Epoch [3/3], Step [1089/1618], Loss: 1.8039, Perplexity: 6.0731
Epoch [3/3], Step [1090/1618], Loss: 2.3278, Perplexity: 10.2550
Epoch [3/3], Step [1091/1618], Loss: 1.8051, Perplexity: 6.0807
Epoch [3/3], Step [1092/1618], Loss: 1.9219, Perplexity: 6.8336
Epoch [3/3], Step [1093/1618], Loss: 1.9230, Perplexity: 6.8414
Epoch [3/3], Step [1094/1618], Loss: 1.9645, Perplexity: 7.1312
Epoch [3/3], Step [1095/1618], Loss: 2.0572, Perplexity: 7.8237
Epoch [3/3], Step [1096/1618], Loss: 1.8347, Perplexity: 6.2633
Epoch [3/3], Step [1097/1618], Loss: 1.9633, Perplexity: 7.1230
Epoch [3/3], Step [1098/1618], Loss: 1.9134, Perplexity: 6.7759
Epoch [3/3], Step [1099/1618], Loss: 1.9471, Perplexity: 7.0083
Epoch [3/3], Step [1100/1618], Loss: 1.9371, Perplexity: 6.9388
Epoch [3/3], Step [1101/1618], Loss: 1.9783, Perplexity: 7.2307
Epoch [3/3], Step [1102/1618], Loss: 1.9407, Perplexity: 6.9640
Epoch [3/3], Step [1103/1618], Loss: 2.1285, Perplexity: 8.4020
Epoch [3/3], Step [1104/1618], Loss: 1.9577, Perplexity: 7.0827
Epoch [3/3], Step [1105/1618], Loss: 1.9375, Perplexity: 6.9411
Epoch [3/3], Step [1106/1618], Loss: 2.0655, Perplexity: 7.8892
Epoch [3/3], Step [1107/1618], Loss: 1.8382, Perplexity: 6.2855
Epoch [3/3], Step [1108/1618], Loss: 1.9617, Perplexity: 7.1113
Epoch [3/3], Step [1109/1618], Loss: 1.8276, Perplexity: 6.2187
Epoch [3/3], Step [1110/1618], Loss: 1.9228, Perplexity: 6.8403
Epoch [3/3], Step [1111/1618], Loss: 1.8774, Perplexity: 6.5365
Epoch [3/3], Step [1112/1618], Loss: 1.8965, Perplexity: 6.6627
Epoch [3/3], Step [1113/1618], Loss: 2.3185, Perplexity: 10.1600
Epoch [3/3], Step [1114/1618], Loss: 1.8863, Perplexity: 6.5948
Epoch [3/3], Step [1115/1618], Loss: 1.8275, Perplexity: 6.2183
Epoch [3/3], Step [1116/1618], Loss: 2.5491, Perplexity: 12.7952
Epoch [3/3], Step [1117/1618], Loss: 1.9479, Perplexity: 7.0137
Epoch [3/3], Step [1118/1618], Loss: 1.9565, Perplexity: 7.0743
Epoch [3/3], Step [1119/1618], Loss: 1.9115, Perplexity: 6.7631
Epoch [3/3], Step [1120/1618], Loss: 2.1213, Perplexity: 8.3417
Epoch [3/3], Step [1121/1618], Loss: 1.8405, Perplexity: 6.2994
Epoch [3/3], Step [1122/1618], Loss: 2.2200, Perplexity: 9.2077
Epoch [3/3], Step [1123/1618], Loss: 1.8966, Perplexity: 6.6634
Epoch [3/3], Step [1124/1618], Loss: 1.9100, Perplexity: 6.7530
Epoch [3/3], Step [1125/1618], Loss: 1.8973, Perplexity: 6.6680
Epoch [3/3], Step [1126/1618], Loss: 1.8932, Perplexity: 6.6409
Epoch [3/3], Step [1127/1618], Loss: 1.9013, Perplexity: 6.6943
Epoch [3/3], Step [1128/1618], Loss: 1.9436, Perplexity: 6.9839
Epoch [3/3], Step [1129/1618], Loss: 1.9189, Perplexity: 6.8132
Epoch [3/3], Step [1130/1618], Loss: 1.8905, Perplexity: 6.6229
Epoch [3/3], Step [1131/1618], Loss: 1.9773, Perplexity: 7.2233
Epoch [3/3], Step [1132/1618], Loss: 1.9644, Perplexity: 7.1303
Epoch [3/3], Step [1133/1618], Loss: 2.2655, Perplexity: 9.6358
Epoch [3/3], Step [1134/1618], Loss: 1.9500, Perplexity: 7.0288
Epoch [3/3], Step [1135/1618], Loss: 1.8714, Perplexity: 6.4977
Epoch [3/3], Step [1136/1618], Loss: 1.8811, Perplexity: 6.5609
Epoch [3/3], Step [1137/1618], Loss: 1.8755, Perplexity: 6.5242
Epoch [3/3], Step [1138/1618], Loss: 1.8967, Perplexity: 6.6637
Epoch [3/3], Step [1139/1618], Loss: 1.9054, Perplexity: 6.7220
Epoch [3/3], Step [1140/1618], Loss: 2.8686, Perplexity: 17.6123
Epoch [3/3], Step [1141/1618], Loss: 1.8942, Perplexity: 6.6472
Epoch [3/3], Step [1142/1618], Loss: 1.9111, Perplexity: 6.7605
Epoch [3/3], Step [1143/1618], Loss: 2.0079, Perplexity: 7.4479
Epoch [3/3], Step [1144/1618], Loss: 1.7895, Perplexity: 5.9866
Epoch [3/3], Step [1145/1618], Loss: 1.9970, Perplexity: 7.3670
Epoch [3/3], Step [1146/1618], Loss: 1.8547, Perplexity: 6.3897
Epoch [3/3], Step [1147/1618], Loss: 1.8875, Perplexity: 6.6026
Epoch [3/3], Step [1148/1618], Loss: 2.8389, Perplexity: 17.0966
Epoch [3/3], Step [1149/1618], Loss: 1.9191, Perplexity: 6.8146
Epoch [3/3], Step [1150/1618], Loss: 1.9486, Perplexity: 7.0190
Epoch [3/3], Step [1151/1618], Loss: 1.7624, Perplexity: 5.8265
Epoch [3/3], Step [1152/1618], Loss: 2.1021, Perplexity: 8.1831
Epoch [3/3], Step [1153/1618], Loss: 1.9374, Perplexity: 6.9410
Epoch [3/3], Step [1154/1618], Loss: 1.9075, Perplexity: 6.7364
Epoch [3/3], Step [1155/1618], Loss: 2.2179, Perplexity: 9.1881
Epoch [3/3], Step [1156/1618], Loss: 1.8658, Perplexity: 6.4612
Epoch [3/3], Step [1157/1618], Loss: 2.3344, Perplexity: 10.3230
Epoch [3/3], Step [1158/1618], Loss: 2.3413, Perplexity: 10.3951
Epoch [3/3], Step [1159/1618], Loss: 1.9721, Perplexity: 7.1856
Epoch [3/3], Step [1160/1618], Loss: 1.9038, Perplexity: 6.7115
Epoch [3/3], Step [1161/1618], Loss: 2.0134, Perplexity: 7.4888
Epoch [3/3], Step [1162/1618], Loss: 1.9145, Perplexity: 6.7836
Epoch [3/3], Step [1163/1618], Loss: 1.9062, Perplexity: 6.7275
Epoch [3/3], Step [1164/1618], Loss: 1.9804, Perplexity: 7.2458
Epoch [3/3], Step [1165/1618], Loss: 1.8363, Perplexity: 6.2736
Epoch [3/3], Step [1166/1618], Loss: 1.8947, Perplexity: 6.6508
Epoch [3/3], Step [1167/1618], Loss: 1.9454, Perplexity: 6.9961
Epoch [3/3], Step [1168/1618], Loss: 1.9335, Perplexity: 6.9140
Epoch [3/3], Step [1169/1618], Loss: 2.0026, Perplexity: 7.4084
Epoch [3/3], Step [1170/1618], Loss: 1.8317, Perplexity: 6.2445
Epoch [3/3], Step [1171/1618], Loss: 1.9010, Perplexity: 6.6929
Epoch [3/3], Step [1172/1618], Loss: 1.9929, Perplexity: 7.3371
Epoch [3/3], Step [1173/1618], Loss: 1.8293, Perplexity: 6.2296
Epoch [3/3], Step [1174/1618], Loss: 1.8715, Perplexity: 6.4979
Epoch [3/3], Step [1175/1618], Loss: 2.1221, Perplexity: 8.3486
Epoch [3/3], Step [1176/1618], Loss: 1.9851, Perplexity: 7.2801
Epoch [3/3], Step [1177/1618], Loss: 2.1051, Perplexity: 8.2076
Epoch [3/3], Step [1178/1618], Loss: 2.2175, Perplexity: 9.1844
Epoch [3/3], Step [1179/1618], Loss: 2.1457, Perplexity: 8.5482
Epoch [3/3], Step [1180/1618], Loss: 1.8083, Perplexity: 6.1001
Epoch [3/3], Step [1181/1618], Loss: 1.8457, Perplexity: 6.3326
Epoch [3/3], Step [1182/1618], Loss: 2.0328, Perplexity: 7.6358
Epoch [3/3], Step [1183/1618], Loss: 1.9108, Perplexity: 6.7585
Epoch [3/3], Step [1184/1618], Loss: 1.9450, Perplexity: 6.9933
Epoch [3/3], Step [1185/1618], Loss: 1.9460, Perplexity: 7.0007
Epoch [3/3], Step [1186/1618], Loss: 2.0017, Perplexity: 7.4016
Epoch [3/3], Step [1187/1618], Loss: 1.9337, Perplexity: 6.9152
Epoch [3/3], Step [1188/1618], Loss: 1.8947, Perplexity: 6.6502
Epoch [3/3], Step [1189/1618], Loss: 1.8415, Perplexity: 6.3057
Epoch [3/3], Step [1190/1618], Loss: 2.0038, Perplexity: 7.4171
Epoch [3/3], Step [1191/1618], Loss: 1.8952, Perplexity: 6.6538
Epoch [3/3], Step [1192/1618], Loss: 1.8702, Perplexity: 6.4893
Epoch [3/3], Step [1193/1618], Loss: 2.3856, Perplexity: 10.8660
Epoch [3/3], Step [1194/1618], Loss: 1.9016, Perplexity: 6.6966
Epoch [3/3], Step [1195/1618], Loss: 1.9970, Perplexity: 7.3668
Epoch [3/3], Step [1196/1618], Loss: 1.8750, Perplexity: 6.5210
Epoch [3/3], Step [1197/1618], Loss: 2.0605, Perplexity: 7.8497
Epoch [3/3], Step [1198/1618], Loss: 1.8037, Perplexity: 6.0722
Epoch [3/3], Step [1199/1618], Loss: 1.9007, Perplexity: 6.6905
Epoch [3/3], Step [1200/1618], Loss: 2.2666, Perplexity: 9.6468
Epoch [3/3], Step [1201/1618], Loss: 1.8786, Perplexity: 6.5441
Epoch [3/3], Step [1202/1618], Loss: 1.8866, Perplexity: 6.5967
Epoch [3/3], Step [1203/1618], Loss: 1.7731, Perplexity: 5.8889
Epoch [3/3], Step [1204/1618], Loss: 1.9098, Perplexity: 6.7520
Epoch [3/3], Step [1205/1618], Loss: 2.1815, Perplexity: 8.8594
Epoch [3/3], Step [1206/1618], Loss: 1.8941, Perplexity: 6.6463
Epoch [3/3], Step [1207/1618], Loss: 1.8916, Perplexity: 6.6300
Epoch [3/3], Step [1208/1618], Loss: 1.9212, Perplexity: 6.8295
Epoch [3/3], Step [1209/1618], Loss: 2.1933, Perplexity: 8.9645
Epoch [3/3], Step [1210/1618], Loss: 1.8782, Perplexity: 6.5416
Epoch [3/3], Step [1211/1618], Loss: 1.8661, Perplexity: 6.4630
Epoch [3/3], Step [1212/1618], Loss: 1.9315, Perplexity: 6.8999
Epoch [3/3], Step [1213/1618], Loss: 1.9186, Perplexity: 6.8117
Epoch [3/3], Step [1214/1618], Loss: 1.9211, Perplexity: 6.8288
Epoch [3/3], Step [1215/1618], Loss: 1.8205, Perplexity: 6.1752
Epoch [3/3], Step [1216/1618], Loss: 1.9184, Perplexity: 6.8101
Epoch [3/3], Step [1217/1618], Loss: 2.1883, Perplexity: 8.9198
Epoch [3/3], Step [1218/1618], Loss: 1.9633, Perplexity: 7.1230
Epoch [3/3], Step [1219/1618], Loss: 1.9621, Perplexity: 7.1141
Epoch [3/3], Step [1220/1618], Loss: 2.2179, Perplexity: 9.1877
Epoch [3/3], Step [1221/1618], Loss: 2.1988, Perplexity: 9.0142
Epoch [3/3], Step [1222/1618], Loss: 1.9805, Perplexity: 7.2461
Epoch [3/3], Step [1223/1618], Loss: 1.9421, Perplexity: 6.9733
Epoch [3/3], Step [1224/1618], Loss: 1.9180, Perplexity: 6.8076
Epoch [3/3], Step [1225/1618], Loss: 2.1389, Perplexity: 8.4899
Epoch [3/3], Step [1226/1618], Loss: 1.8245, Perplexity: 6.1999
Epoch [3/3], Step [1227/1618], Loss: 1.8949, Perplexity: 6.6518
Epoch [3/3], Step [1228/1618], Loss: 1.9311, Perplexity: 6.8970
Epoch [3/3], Step [1229/1618], Loss: 1.9813, Perplexity: 7.2520
Epoch [3/3], Step [1230/1618], Loss: 1.8844, Perplexity: 6.5826
Epoch [3/3], Step [1231/1618], Loss: 1.9362, Perplexity: 6.9326
Epoch [3/3], Step [1232/1618], Loss: 1.8801, Perplexity: 6.5543
Epoch [3/3], Step [1233/1618], Loss: 1.9249, Perplexity: 6.8547
Epoch [3/3], Step [1234/1618], Loss: 1.8772, Perplexity: 6.5351
Epoch [3/3], Step [1235/1618], Loss: 1.9846, Perplexity: 7.2762
Epoch [3/3], Step [1236/1618], Loss: 1.8364, Perplexity: 6.2742
Epoch [3/3], Step [1237/1618], Loss: 1.8780, Perplexity: 6.5406
Epoch [3/3], Step [1238/1618], Loss: 1.7796, Perplexity: 5.9272
Epoch [3/3], Step [1239/1618], Loss: 1.8957, Perplexity: 6.6571
Epoch [3/3], Step [1240/1618], Loss: 1.8743, Perplexity: 6.5160
Epoch [3/3], Step [1241/1618], Loss: 2.1734, Perplexity: 8.7877
Epoch [3/3], Step [1242/1618], Loss: 2.1483, Perplexity: 8.5706
Epoch [3/3], Step [1243/1618], Loss: 1.8971, Perplexity: 6.6667
Epoch [3/3], Step [1244/1618], Loss: 1.9341, Perplexity: 6.9177
Epoch [3/3], Step [1245/1618], Loss: 1.8351, Perplexity: 6.2659
Epoch [3/3], Step [1246/1618], Loss: 1.8172, Perplexity: 6.1543
Epoch [3/3], Step [1247/1618], Loss: 1.8651, Perplexity: 6.4567
Epoch [3/3], Step [1248/1618], Loss: 2.5150, Perplexity: 12.3667
Epoch [3/3], Step [1249/1618], Loss: 1.8892, Perplexity: 6.6143
Epoch [3/3], Step [1250/1618], Loss: 1.8362, Perplexity: 6.2727
Epoch [3/3], Step [1251/1618], Loss: 1.8598, Perplexity: 6.4222
Epoch [3/3], Step [1252/1618], Loss: 1.8204, Perplexity: 6.1746
Epoch [3/3], Step [1253/1618], Loss: 1.8679, Perplexity: 6.4749
Epoch [3/3], Step [1254/1618], Loss: 2.2166, Perplexity: 9.1763
Epoch [3/3], Step [1255/1618], Loss: 1.8674, Perplexity: 6.4715
Epoch [3/3], Step [1256/1618], Loss: 2.2805, Perplexity: 9.7813
Epoch [3/3], Step [1257/1618], Loss: 1.9274, Perplexity: 6.8718
Epoch [3/3], Step [1258/1618], Loss: 1.8133, Perplexity: 6.1308
Epoch [3/3], Step [1259/1618], Loss: 2.0899, Perplexity: 8.0843
Epoch [3/3], Step [1260/1618], Loss: 1.8658, Perplexity: 6.4608
Epoch [3/3], Step [1261/1618], Loss: 1.8277, Perplexity: 6.2197
Epoch [3/3], Step [1262/1618], Loss: 1.8690, Perplexity: 6.4815
Epoch [3/3], Step [1263/1618], Loss: 2.6530, Perplexity: 14.1970
Epoch [3/3], Step [1264/1618], Loss: 1.8233, Perplexity: 6.1924
Epoch [3/3], Step [1265/1618], Loss: 1.9239, Perplexity: 6.8477
Epoch [3/3], Step [1266/1618], Loss: 1.8623, Perplexity: 6.4387
Epoch [3/3], Step [1267/1618], Loss: 1.8590, Perplexity: 6.4174
Epoch [3/3], Step [1268/1618], Loss: 1.9639, Perplexity: 7.1272
Epoch [3/3], Step [1269/1618], Loss: 1.8479, Perplexity: 6.3463
Epoch [3/3], Step [1270/1618], Loss: 1.9651, Perplexity: 7.1355
Epoch [3/3], Step [1271/1618], Loss: 2.0056, Perplexity: 7.4306
Epoch [3/3], Step [1272/1618], Loss: 1.9113, Perplexity: 6.7618
Epoch [3/3], Step [1273/1618], Loss: 1.9243, Perplexity: 6.8501
Epoch [3/3], Step [1274/1618], Loss: 1.8996, Perplexity: 6.6833
Epoch [3/3], Step [1275/1618], Loss: 2.5793, Perplexity: 13.1874
Epoch [3/3], Step [1276/1618], Loss: 1.9145, Perplexity: 6.7837
Epoch [3/3], Step [1277/1618], Loss: 1.9220, Perplexity: 6.8348
Epoch [3/3], Step [1278/1618], Loss: 1.8362, Perplexity: 6.2728
Epoch [3/3], Step [1279/1618], Loss: 1.9495, Perplexity: 7.0251
Epoch [3/3], Step [1280/1618], Loss: 1.8159, Perplexity: 6.1464
Epoch [3/3], Step [1281/1618], Loss: 2.0668, Perplexity: 7.8995
Epoch [3/3], Step [1282/1618], Loss: 1.8911, Perplexity: 6.6264
Epoch [3/3], Step [1283/1618], Loss: 2.5683, Perplexity: 13.0441
Epoch [3/3], Step [1284/1618], Loss: 1.9237, Perplexity: 6.8459
Epoch [3/3], Step [1285/1618], Loss: 2.0963, Perplexity: 8.1364
Epoch [3/3], Step [1286/1618], Loss: 2.1456, Perplexity: 8.5473
Epoch [3/3], Step [1287/1618], Loss: 3.0497, Perplexity: 21.1091
Epoch [3/3], Step [1288/1618], Loss: 1.7925, Perplexity: 6.0043
Epoch [3/3], Step [1289/1618], Loss: 2.1483, Perplexity: 8.5701
Epoch [3/3], Step [1290/1618], Loss: 1.8990, Perplexity: 6.6795
Epoch [3/3], Step [1291/1618], Loss: 1.8722, Perplexity: 6.5025
Epoch [3/3], Step [1292/1618], Loss: 1.8610, Perplexity: 6.4304
Epoch [3/3], Step [1293/1618], Loss: 2.0022, Perplexity: 7.4054
Epoch [3/3], Step [1294/1618], Loss: 1.8912, Perplexity: 6.6272
Epoch [3/3], Step [1295/1618], Loss: 1.8220, Perplexity: 6.1843
Epoch [3/3], Step [1296/1618], Loss: 2.0179, Perplexity: 7.5222
Epoch [3/3], Step [1297/1618], Loss: 2.7497, Perplexity: 15.6373
Epoch [3/3], Step [1298/1618], Loss: 1.9122, Perplexity: 6.7677
Epoch [3/3], Step [1299/1618], Loss: 1.9228, Perplexity: 6.8399
Epoch [3/3], Step [1300/1618], Loss: 1.9543, Perplexity: 7.0592
Epoch [3/3], Step [1301/1618], Loss: 1.9065, Perplexity: 6.7293
Epoch [3/3], Step [1302/1618], Loss: 1.8744, Perplexity: 6.5171
Epoch [3/3], Step [1303/1618], Loss: 2.0500, Perplexity: 7.7683
Epoch [3/3], Step [1304/1618], Loss: 1.8927, Perplexity: 6.6374
Epoch [3/3], Step [1305/1618], Loss: 2.0992, Perplexity: 8.1597
Epoch [3/3], Step [1306/1618], Loss: 1.8947, Perplexity: 6.6508
Epoch [3/3], Step [1307/1618], Loss: 1.8701, Perplexity: 6.4888
Epoch [3/3], Step [1308/1618], Loss: 1.8298, Perplexity: 6.2324
Epoch [3/3], Step [1309/1618], Loss: 1.8685, Perplexity: 6.4788
Epoch [3/3], Step [1310/1618], Loss: 1.9450, Perplexity: 6.9933
Epoch [3/3], Step [1311/1618], Loss: 2.1198, Perplexity: 8.3293
Epoch [3/3], Step [1312/1618], Loss: 1.9644, Perplexity: 7.1304
Epoch [3/3], Step [1313/1618], Loss: 2.7801, Perplexity: 16.1206
Epoch [3/3], Step [1314/1618], Loss: 2.0693, Perplexity: 7.9194
Epoch [3/3], Step [1315/1618], Loss: 1.8480, Perplexity: 6.3469
Epoch [3/3], Step [1316/1618], Loss: 2.1294, Perplexity: 8.4095
Epoch [3/3], Step [1317/1618], Loss: 1.8884, Perplexity: 6.6088
Epoch [3/3], Step [1318/1618], Loss: 1.8455, Perplexity: 6.3311
Epoch [3/3], Step [1319/1618], Loss: 2.0487, Perplexity: 7.7581
Epoch [3/3], Step [1320/1618], Loss: 1.8627, Perplexity: 6.4412
Epoch [3/3], Step [1321/1618], Loss: 1.8967, Perplexity: 6.6638
Epoch [3/3], Step [1322/1618], Loss: 1.7829, Perplexity: 5.9472
Epoch [3/3], Step [1323/1618], Loss: 1.8928, Perplexity: 6.6379
Epoch [3/3], Step [1324/1618], Loss: 1.7851, Perplexity: 5.9600
Epoch [3/3], Step [1325/1618], Loss: 2.2042, Perplexity: 9.0629
Epoch [3/3], Step [1326/1618], Loss: 1.8794, Perplexity: 6.5493
Epoch [3/3], Step [1327/1618], Loss: 1.8081, Perplexity: 6.0989
Epoch [3/3], Step [1328/1618], Loss: 1.9426, Perplexity: 6.9766
Epoch [3/3], Step [1329/1618], Loss: 1.9361, Perplexity: 6.9320
Epoch [3/3], Step [1330/1618], Loss: 1.8592, Perplexity: 6.4189
Epoch [3/3], Step [1331/1618], Loss: 2.1370, Perplexity: 8.4737
Epoch [3/3], Step [1332/1618], Loss: 2.1692, Perplexity: 8.7511
Epoch [3/3], Step [1333/1618], Loss: 2.0811, Perplexity: 8.0135
Epoch [3/3], Step [1334/1618], Loss: 1.8683, Perplexity: 6.4772
Epoch [3/3], Step [1335/1618], Loss: 1.9000, Perplexity: 6.6859
Epoch [3/3], Step [1336/1618], Loss: 1.9039, Perplexity: 6.7121
Epoch [3/3], Step [1337/1618], Loss: 1.8506, Perplexity: 6.3638
Epoch [3/3], Step [1338/1618], Loss: 1.9602, Perplexity: 7.1006
Epoch [3/3], Step [1339/1618], Loss: 1.8270, Perplexity: 6.2154
Epoch [3/3], Step [1340/1618], Loss: 1.8685, Perplexity: 6.4789
Epoch [3/3], Step [1341/1618], Loss: 1.8401, Perplexity: 6.2974
Epoch [3/3], Step [1342/1618], Loss: 1.8490, Perplexity: 6.3537
Epoch [3/3], Step [1343/1618], Loss: 1.8503, Perplexity: 6.3620
Epoch [3/3], Step [1344/1618], Loss: 1.8058, Perplexity: 6.0849
Epoch [3/3], Step [1345/1618], Loss: 1.9003, Perplexity: 6.6878
Epoch [3/3], Step [1346/1618], Loss: 1.9416, Perplexity: 6.9699
Epoch [3/3], Step [1347/1618], Loss: 1.9617, Perplexity: 7.1116
Epoch [3/3], Step [1348/1618], Loss: 2.2752, Perplexity: 9.7300
Epoch [3/3], Step [1349/1618], Loss: 1.8053, Perplexity: 6.0816
Epoch [3/3], Step [1350/1618], Loss: 2.0072, Perplexity: 7.4423
Epoch [3/3], Step [1351/1618], Loss: 1.9326, Perplexity: 6.9073
Epoch [3/3], Step [1352/1618], Loss: 1.9121, Perplexity: 6.7674
Epoch [3/3], Step [1353/1618], Loss: 1.9598, Perplexity: 7.0981
Epoch [3/3], Step [1354/1618], Loss: 1.9171, Perplexity: 6.8012
Epoch [3/3], Step [1355/1618], Loss: 1.8946, Perplexity: 6.6500
Epoch [3/3], Step [1356/1618], Loss: 2.1163, Perplexity: 8.3001
Epoch [3/3], Step [1357/1618], Loss: 2.1467, Perplexity: 8.5568
Epoch [3/3], Step [1358/1618], Loss: 1.8674, Perplexity: 6.4715
Epoch [3/3], Step [1359/1618], Loss: 1.9082, Perplexity: 6.7412
Epoch [3/3], Step [1360/1618], Loss: 1.9435, Perplexity: 6.9834
Epoch [3/3], Step [1361/1618], Loss: 1.8986, Perplexity: 6.6763
Epoch [3/3], Step [1362/1618], Loss: 1.8009, Perplexity: 6.0553
Epoch [3/3], Step [1363/1618], Loss: 2.0374, Perplexity: 7.6703
Epoch [3/3], Step [1364/1618], Loss: 1.8572, Perplexity: 6.4061
Epoch [3/3], Step [1365/1618], Loss: 1.8434, Perplexity: 6.3178
Epoch [3/3], Step [1366/1618], Loss: 1.8610, Perplexity: 6.4304
Epoch [3/3], Step [1367/1618], Loss: 1.9066, Perplexity: 6.7299
Epoch [3/3], Step [1368/1618], Loss: 1.8197, Perplexity: 6.1699
Epoch [3/3], Step [1369/1618], Loss: 2.1178, Perplexity: 8.3130
Epoch [3/3], Step [1370/1618], Loss: 1.8736, Perplexity: 6.5115
Epoch [3/3], Step [1371/1618], Loss: 1.9355, Perplexity: 6.9275
Epoch [3/3], Step [1372/1618], Loss: 1.8966, Perplexity: 6.6629
Epoch [3/3], Step [1373/1618], Loss: 2.2384, Perplexity: 9.3786
Epoch [3/3], Step [1374/1618], Loss: 1.8056, Perplexity: 6.0838
Epoch [3/3], Step [1375/1618], Loss: 1.9263, Perplexity: 6.8643
Epoch [3/3], Step [1376/1618], Loss: 1.9181, Perplexity: 6.8079
Epoch [3/3], Step [1377/1618], Loss: 1.8157, Perplexity: 6.1451
Epoch [3/3], Step [1378/1618], Loss: 1.8897, Perplexity: 6.6177
Epoch [3/3], Step [1379/1618], Loss: 1.8570, Perplexity: 6.4047
Epoch [3/3], Step [1380/1618], Loss: 1.7666, Perplexity: 5.8510
Epoch [3/3], Step [1381/1618], Loss: 1.8225, Perplexity: 6.1873
Epoch [3/3], Step [1382/1618], Loss: 2.0914, Perplexity: 8.0963
Epoch [3/3], Step [1383/1618], Loss: 1.8510, Perplexity: 6.3661
Epoch [3/3], Step [1384/1618], Loss: 1.7973, Perplexity: 6.0333
Epoch [3/3], Step [1385/1618], Loss: 1.9549, Perplexity: 7.0634
Epoch [3/3], Step [1386/1618], Loss: 1.9961, Perplexity: 7.3602
Epoch [3/3], Step [1387/1618], Loss: 1.8408, Perplexity: 6.3014
Epoch [3/3], Step [1388/1618], Loss: 2.0559, Perplexity: 7.8140
Epoch [3/3], Step [1389/1618], Loss: 2.0049, Perplexity: 7.4250
Epoch [3/3], Step [1390/1618], Loss: 1.7913, Perplexity: 5.9970
Epoch [3/3], Step [1391/1618], Loss: 1.9384, Perplexity: 6.9479
Epoch [3/3], Step [1392/1618], Loss: 1.9320, Perplexity: 6.9035
Epoch [3/3], Step [1393/1618], Loss: 2.3358, Perplexity: 10.3375
Epoch [3/3], Step [1394/1618], Loss: 1.8274, Perplexity: 6.2179
Epoch [3/3], Step [1395/1618], Loss: 1.8585, Perplexity: 6.4140
Epoch [3/3], Step [1396/1618], Loss: 1.8666, Perplexity: 6.4660
Epoch [3/3], Step [1397/1618], Loss: 1.9041, Perplexity: 6.7131
Epoch [3/3], Step [1398/1618], Loss: 1.9420, Perplexity: 6.9729
Epoch [3/3], Step [1399/1618], Loss: 2.2304, Perplexity: 9.3038
Epoch [3/3], Step [1400/1618], Loss: 2.0088, Perplexity: 7.4543
Epoch [3/3], Step [1401/1618], Loss: 2.1921, Perplexity: 8.9543
Epoch [3/3], Step [1402/1618], Loss: 2.1553, Perplexity: 8.6307
Epoch [3/3], Step [1403/1618], Loss: 2.0747, Perplexity: 7.9624
Epoch [3/3], Step [1404/1618], Loss: 1.8368, Perplexity: 6.2764
Epoch [3/3], Step [1405/1618], Loss: 1.8969, Perplexity: 6.6652
Epoch [3/3], Step [1406/1618], Loss: 1.8902, Perplexity: 6.6210
Epoch [3/3], Step [1407/1618], Loss: 2.5607, Perplexity: 12.9446
Epoch [3/3], Step [1408/1618], Loss: 1.9115, Perplexity: 6.7633
Epoch [3/3], Step [1409/1618], Loss: 2.0992, Perplexity: 8.1592
Epoch [3/3], Step [1410/1618], Loss: 1.8701, Perplexity: 6.4890
Epoch [3/3], Step [1411/1618], Loss: 1.8295, Perplexity: 6.2309
Epoch [3/3], Step [1412/1618], Loss: 2.1539, Perplexity: 8.6184
Epoch [3/3], Step [1413/1618], Loss: 1.8261, Perplexity: 6.2094
Epoch [3/3], Step [1414/1618], Loss: 1.9986, Perplexity: 7.3785
Epoch [3/3], Step [1415/1618], Loss: 1.8052, Perplexity: 6.0812
Epoch [3/3], Step [1416/1618], Loss: 1.9563, Perplexity: 7.0733
Epoch [3/3], Step [1417/1618], Loss: 1.8366, Perplexity: 6.2750
Epoch [3/3], Step [1418/1618], Loss: 1.8587, Perplexity: 6.4151
Epoch [3/3], Step [1419/1618], Loss: 1.9000, Perplexity: 6.6858
Epoch [3/3], Step [1420/1618], Loss: 1.8223, Perplexity: 6.1863
Epoch [3/3], Step [1421/1618], Loss: 1.9068, Perplexity: 6.7314
Epoch [3/3], Step [1422/1618], Loss: 1.8412, Perplexity: 6.3038
Epoch [3/3], Step [1423/1618], Loss: 1.7863, Perplexity: 5.9675
Epoch [3/3], Step [1424/1618], Loss: 2.0154, Perplexity: 7.5035
Epoch [3/3], Step [1425/1618], Loss: 1.8434, Perplexity: 6.3180
Epoch [3/3], Step [1426/1618], Loss: 1.9990, Perplexity: 7.3814
Epoch [3/3], Step [1427/1618], Loss: 1.8981, Perplexity: 6.6735
Epoch [3/3], Step [1428/1618], Loss: 2.3280, Perplexity: 10.2576
Epoch [3/3], Step [1429/1618], Loss: 1.9464, Perplexity: 7.0037
Epoch [3/3], Step [1430/1618], Loss: 2.1427, Perplexity: 8.5220
Epoch [3/3], Step [1431/1618], Loss: 1.8112, Perplexity: 6.1180
Epoch [3/3], Step [1432/1618], Loss: 1.9925, Perplexity: 7.3340
Epoch [3/3], Step [1433/1618], Loss: 1.8571, Perplexity: 6.4050
Epoch [3/3], Step [1434/1618], Loss: 1.9395, Perplexity: 6.9554
Epoch [3/3], Step [1435/1618], Loss: 2.1074, Perplexity: 8.2266
Epoch [3/3], Step [1436/1618], Loss: 1.8912, Perplexity: 6.6274
Epoch [3/3], Step [1437/1618], Loss: 1.9284, Perplexity: 6.8783
Epoch [3/3], Step [1438/1618], Loss: 1.8515, Perplexity: 6.3696
Epoch [3/3], Step [1439/1618], Loss: 1.9541, Perplexity: 7.0577
Epoch [3/3], Step [1440/1618], Loss: 1.8853, Perplexity: 6.5882
Epoch [3/3], Step [1441/1618], Loss: 1.9115, Perplexity: 6.7629
Epoch [3/3], Step [1442/1618], Loss: 1.8270, Perplexity: 6.2153
Epoch [3/3], Step [1443/1618], Loss: 2.0968, Perplexity: 8.1397
Epoch [3/3], Step [1444/1618], Loss: 1.8609, Perplexity: 6.4293
Epoch [3/3], Step [1445/1618], Loss: 1.9026, Perplexity: 6.7036
Epoch [3/3], Step [1446/1618], Loss: 1.8635, Perplexity: 6.4464
Epoch [3/3], Step [1447/1618], Loss: 1.8917, Perplexity: 6.6304
Epoch [3/3], Step [1448/1618], Loss: 1.8471, Perplexity: 6.3417
Epoch [3/3], Step [1449/1618], Loss: 1.9302, Perplexity: 6.8908
Epoch [3/3], Step [1450/1618], Loss: 1.7429, Perplexity: 5.7141
Epoch [3/3], Step [1451/1618], Loss: 1.9246, Perplexity: 6.8525
Epoch [3/3], Step [1452/1618], Loss: 2.2170, Perplexity: 9.1795
Epoch [3/3], Step [1453/1618], Loss: 2.1526, Perplexity: 8.6072
Epoch [3/3], Step [1454/1618], Loss: 1.9096, Perplexity: 6.7506
Epoch [3/3], Step [1455/1618], Loss: 2.0147, Perplexity: 7.4985
Epoch [3/3], Step [1456/1618], Loss: 1.9033, Perplexity: 6.7081
Epoch [3/3], Step [1457/1618], Loss: 2.0040, Perplexity: 7.4185
Epoch [3/3], Step [1458/1618], Loss: 2.0184, Perplexity: 7.5263
Epoch [3/3], Step [1459/1618], Loss: 1.7995, Perplexity: 6.0466
Epoch [3/3], Step [1460/1618], Loss: 1.8229, Perplexity: 6.1897
Epoch [3/3], Step [1461/1618], Loss: 1.8793, Perplexity: 6.5491
Epoch [3/3], Step [1462/1618], Loss: 1.9058, Perplexity: 6.7249
Epoch [3/3], Step [1463/1618], Loss: 1.8803, Perplexity: 6.5556
Epoch [3/3], Step [1464/1618], Loss: 1.8146, Perplexity: 6.1387
Epoch [3/3], Step [1465/1618], Loss: 1.9092, Perplexity: 6.7475
Epoch [3/3], Step [1466/1618], Loss: 1.8671, Perplexity: 6.4692
Epoch [3/3], Step [1467/1618], Loss: 2.3448, Perplexity: 10.4310
Epoch [3/3], Step [1468/1618], Loss: 1.8678, Perplexity: 6.4740
Epoch [3/3], Step [1469/1618], Loss: 1.9230, Perplexity: 6.8417
Epoch [3/3], Step [1470/1618], Loss: 1.8751, Perplexity: 6.5218
Epoch [3/3], Step [1471/1618], Loss: 2.2258, Perplexity: 9.2609
Epoch [3/3], Step [1472/1618], Loss: 1.8414, Perplexity: 6.3054
Epoch [3/3], Step [1473/1618], Loss: 1.9250, Perplexity: 6.8553
Epoch [3/3], Step [1474/1618], Loss: 1.9550, Perplexity: 7.0640
Epoch [3/3], Step [1475/1618], Loss: 1.8329, Perplexity: 6.2522
Epoch [3/3], Step [1476/1618], Loss: 1.8924, Perplexity: 6.6352
Epoch [3/3], Step [1477/1618], Loss: 1.8485, Perplexity: 6.3504
Epoch [3/3], Step [1478/1618], Loss: 3.0402, Perplexity: 20.9103
Epoch [3/3], Step [1479/1618], Loss: 2.1196, Perplexity: 8.3278
Epoch [3/3], Step [1480/1618], Loss: 1.8964, Perplexity: 6.6617
Epoch [3/3], Step [1481/1618], Loss: 1.9538, Perplexity: 7.0552
Epoch [3/3], Step [1482/1618], Loss: 1.9217, Perplexity: 6.8327
Epoch [3/3], Step [1483/1618], Loss: 1.8743, Perplexity: 6.5165
Epoch [3/3], Step [1484/1618], Loss: 2.0632, Perplexity: 7.8712
Epoch [3/3], Step [1485/1618], Loss: 1.9047, Perplexity: 6.7176
Epoch [3/3], Step [1486/1618], Loss: 1.8299, Perplexity: 6.2331
Epoch [3/3], Step [1487/1618], Loss: 1.8400, Perplexity: 6.2963
Epoch [3/3], Step [1488/1618], Loss: 2.4845, Perplexity: 11.9950
Epoch [3/3], Step [1489/1618], Loss: 1.9708, Perplexity: 7.1762
Epoch [3/3], Step [1490/1618], Loss: 1.8385, Perplexity: 6.2871
Epoch [3/3], Step [1491/1618], Loss: 1.9344, Perplexity: 6.9202
Epoch [3/3], Step [1492/1618], Loss: 1.9056, Perplexity: 6.7232
Epoch [3/3], Step [1493/1618], Loss: 1.9169, Perplexity: 6.7999
Epoch [3/3], Step [1494/1618], Loss: 1.9123, Perplexity: 6.7683
Epoch [3/3], Step [1495/1618], Loss: 1.9309, Perplexity: 6.8954
Epoch [3/3], Step [1496/1618], Loss: 1.8151, Perplexity: 6.1415
Epoch [3/3], Step [1497/1618], Loss: 2.2103, Perplexity: 9.1182
Epoch [3/3], Step [1498/1618], Loss: 1.9267, Perplexity: 6.8666
Epoch [3/3], Step [1499/1618], Loss: 1.8251, Perplexity: 6.2034
Epoch [3/3], Step [1500/1618], Loss: 2.1419, Perplexity: 8.5157
Epoch [3/3], Step [1501/1618], Loss: 1.7946, Perplexity: 6.0170
Epoch [3/3], Step [1502/1618], Loss: 2.0294, Perplexity: 7.6093
Epoch [3/3], Step [1503/1618], Loss: 1.8717, Perplexity: 6.4993
Epoch [3/3], Step [1504/1618], Loss: 2.0179, Perplexity: 7.5223
Epoch [3/3], Step [1505/1618], Loss: 2.0177, Perplexity: 7.5209
Epoch [3/3], Step [1506/1618], Loss: 1.7875, Perplexity: 5.9746
Epoch [3/3], Step [1507/1618], Loss: 1.8865, Perplexity: 6.5963
Epoch [3/3], Step [1508/1618], Loss: 1.8553, Perplexity: 6.3936
Epoch [3/3], Step [1509/1618], Loss: 1.9672, Perplexity: 7.1503
Epoch [3/3], Step [1510/1618], Loss: 1.9319, Perplexity: 6.9024
Epoch [3/3], Step [1511/1618], Loss: 1.9101, Perplexity: 6.7541
Epoch [3/3], Step [1512/1618], Loss: 1.7706, Perplexity: 5.8745
Epoch [3/3], Step [1513/1618], Loss: 2.5923, Perplexity: 13.3605
Epoch [3/3], Step [1514/1618], Loss: 1.9345, Perplexity: 6.9206
Epoch [3/3], Step [1515/1618], Loss: 2.1569, Perplexity: 8.6439
Epoch [3/3], Step [1516/1618], Loss: 1.8384, Perplexity: 6.2866
Epoch [3/3], Step [1517/1618], Loss: 1.8352, Perplexity: 6.2666
Epoch [3/3], Step [1518/1618], Loss: 2.2758, Perplexity: 9.7358
Epoch [3/3], Step [1519/1618], Loss: 1.8823, Perplexity: 6.5686
Epoch [3/3], Step [1520/1618], Loss: 1.8773, Perplexity: 6.5360
Epoch [3/3], Step [1521/1618], Loss: 1.8679, Perplexity: 6.4749
Epoch [3/3], Step [1522/1618], Loss: 2.0596, Perplexity: 7.8429
Epoch [3/3], Step [1523/1618], Loss: 1.9518, Perplexity: 7.0412
Epoch [3/3], Step [1524/1618], Loss: 1.8428, Perplexity: 6.3141
Epoch [3/3], Step [1525/1618], Loss: 1.8193, Perplexity: 6.1673
Epoch [3/3], Step [1526/1618], Loss: 1.8340, Perplexity: 6.2590
Epoch [3/3], Step [1527/1618], Loss: 1.8346, Perplexity: 6.2627
Epoch [3/3], Step [1528/1618], Loss: 1.9539, Perplexity: 7.0558
Epoch [3/3], Step [1529/1618], Loss: 1.8735, Perplexity: 6.5110
Epoch [3/3], Step [1530/1618], Loss: 1.8623, Perplexity: 6.4387
Epoch [3/3], Step [1531/1618], Loss: 2.0638, Perplexity: 7.8762
Epoch [3/3], Step [1532/1618], Loss: 2.0979, Perplexity: 8.1490
Epoch [3/3], Step [1533/1618], Loss: 1.9425, Perplexity: 6.9764
Epoch [3/3], Step [1534/1618], Loss: 1.9513, Perplexity: 7.0376
Epoch [3/3], Step [1535/1618], Loss: 2.1198, Perplexity: 8.3291
Epoch [3/3], Step [1536/1618], Loss: 1.8768, Perplexity: 6.5326
Epoch [3/3], Step [1537/1618], Loss: 1.8095, Perplexity: 6.1075
Epoch [3/3], Step [1538/1618], Loss: 1.9541, Perplexity: 7.0579
Epoch [3/3], Step [1539/1618], Loss: 1.9486, Perplexity: 7.0191
Epoch [3/3], Step [1540/1618], Loss: 1.9593, Perplexity: 7.0944
Epoch [3/3], Step [1541/1618], Loss: 1.8444, Perplexity: 6.3246
Epoch [3/3], Step [1542/1618], Loss: 2.0523, Perplexity: 7.7858
Epoch [3/3], Step [1543/1618], Loss: 1.8500, Perplexity: 6.3599
Epoch [3/3], Step [1544/1618], Loss: 1.8688, Perplexity: 6.4808
Epoch [3/3], Step [1545/1618], Loss: 2.4214, Perplexity: 11.2621
Epoch [3/3], Step [1546/1618], Loss: 2.3811, Perplexity: 10.8166
Epoch [3/3], Step [1547/1618], Loss: 1.9731, Perplexity: 7.1931
Epoch [3/3], Step [1548/1618], Loss: 1.8903, Perplexity: 6.6214
Epoch [3/3], Step [1549/1618], Loss: 1.9080, Perplexity: 6.7397
Epoch [3/3], Step [1550/1618], Loss: 1.7309, Perplexity: 5.6459
Epoch [3/3], Step [1551/1618], Loss: 1.9451, Perplexity: 6.9941
Epoch [3/3], Step [1552/1618], Loss: 1.8916, Perplexity: 6.6302
Epoch [3/3], Step [1553/1618], Loss: 1.8622, Perplexity: 6.4376
Epoch [3/3], Step [1554/1618], Loss: 1.8715, Perplexity: 6.4979
Epoch [3/3], Step [1555/1618], Loss: 1.9241, Perplexity: 6.8489
Epoch [3/3], Step [1556/1618], Loss: 1.9384, Perplexity: 6.9479
Epoch [3/3], Step [1557/1618], Loss: 1.8850, Perplexity: 6.5863
Epoch [3/3], Step [1558/1618], Loss: 1.9623, Perplexity: 7.1156
Epoch [3/3], Step [1559/1618], Loss: 2.1214, Perplexity: 8.3425
Epoch [3/3], Step [1560/1618], Loss: 1.8278, Perplexity: 6.2204
Epoch [3/3], Step [1561/1618], Loss: 1.8767, Perplexity: 6.5322
Epoch [3/3], Step [1562/1618], Loss: 1.9955, Perplexity: 7.3557
Epoch [3/3], Step [1563/1618], Loss: 2.1331, Perplexity: 8.4414
Epoch [3/3], Step [1564/1618], Loss: 2.1154, Perplexity: 8.2930
Epoch [3/3], Step [1565/1618], Loss: 1.8319, Perplexity: 6.2459
Epoch [3/3], Step [1566/1618], Loss: 1.9127, Perplexity: 6.7711
Epoch [3/3], Step [1567/1618], Loss: 1.8842, Perplexity: 6.5809
Epoch [3/3], Step [1568/1618], Loss: 1.9416, Perplexity: 6.9696
Epoch [3/3], Step [1569/1618], Loss: 1.8842, Perplexity: 6.5814
Epoch [3/3], Step [1570/1618], Loss: 1.8293, Perplexity: 6.2294
Epoch [3/3], Step [1571/1618], Loss: 1.8719, Perplexity: 6.5007
Epoch [3/3], Step [1572/1618], Loss: 1.8456, Perplexity: 6.3320
Epoch [3/3], Step [1573/1618], Loss: 1.9305, Perplexity: 6.8932
Epoch [3/3], Step [1574/1618], Loss: 1.8536, Perplexity: 6.3827
Epoch [3/3], Step [1575/1618], Loss: 1.9018, Perplexity: 6.6979
Epoch [3/3], Step [1576/1618], Loss: 1.9064, Perplexity: 6.7289
Epoch [3/3], Step [1577/1618], Loss: 1.8156, Perplexity: 6.1449
Epoch [3/3], Step [1578/1618], Loss: 1.9714, Perplexity: 7.1809
Epoch [3/3], Step [1579/1618], Loss: 1.9470, Perplexity: 7.0077
Epoch [3/3], Step [1580/1618], Loss: 1.8389, Perplexity: 6.2895
Epoch [3/3], Step [1581/1618], Loss: 2.0437, Perplexity: 7.7194
Epoch [3/3], Step [1582/1618], Loss: 1.8857, Perplexity: 6.5909
Epoch [3/3], Step [1583/1618], Loss: 1.9281, Perplexity: 6.8765
Epoch [3/3], Step [1584/1618], Loss: 1.8176, Perplexity: 6.1573
Epoch [3/3], Step [1585/1618], Loss: 1.9672, Perplexity: 7.1504
Epoch [3/3], Step [1586/1618], Loss: 1.8608, Perplexity: 6.4288
Epoch [3/3], Step [1587/1618], Loss: 1.8102, Perplexity: 6.1119
Epoch [3/3], Step [1588/1618], Loss: 3.0731, Perplexity: 21.6082
Epoch [3/3], Step [1589/1618], Loss: 2.0399, Perplexity: 7.6898
Epoch [3/3], Step [1590/1618], Loss: 1.9420, Perplexity: 6.9727
Epoch [3/3], Step [1591/1618], Loss: 1.9761, Perplexity: 7.2145
Epoch [3/3], Step [1592/1618], Loss: 2.1163, Perplexity: 8.3004
Epoch [3/3], Step [1593/1618], Loss: 1.9075, Perplexity: 6.7362
Epoch [3/3], Step [1594/1618], Loss: 2.0069, Perplexity: 7.4400
Epoch [3/3], Step [1595/1618], Loss: 1.8525, Perplexity: 6.3757
Epoch [3/3], Step [1596/1618], Loss: 1.9605, Perplexity: 7.1026
Epoch [3/3], Step [1597/1618], Loss: 1.9212, Perplexity: 6.8294
Epoch [3/3], Step [1598/1618], Loss: 1.8469, Perplexity: 6.3402
Epoch [3/3], Step [1599/1618], Loss: 1.8503, Perplexity: 6.3615
Epoch [3/3], Step [1600/1618], Loss: 1.8090, Perplexity: 6.1043
Epoch [3/3], Step [1601/1618], Loss: 2.2747, Perplexity: 9.7253
Epoch [3/3], Step [1602/1618], Loss: 1.8380, Perplexity: 6.2838
Epoch [3/3], Step [1603/1618], Loss: 1.8211, Perplexity: 6.1784
Epoch [3/3], Step [1604/1618], Loss: 1.8997, Perplexity: 6.6842
Epoch [3/3], Step [1605/1618], Loss: 1.8941, Perplexity: 6.6466
Epoch [3/3], Step [1606/1618], Loss: 2.6816, Perplexity: 14.6089
Epoch [3/3], Step [1607/1618], Loss: 1.8411, Perplexity: 6.3036
Epoch [3/3], Step [1608/1618], Loss: 2.1123, Perplexity: 8.2674
Epoch [3/3], Step [1609/1618], Loss: 1.9394, Perplexity: 6.9547
Epoch [3/3], Step [1610/1618], Loss: 1.7417, Perplexity: 5.7071
Epoch [3/3], Step [1611/1618], Loss: 1.9807, Perplexity: 7.2480
Epoch [3/3], Step [1612/1618], Loss: 1.8845, Perplexity: 6.5834
Epoch [3/3], Step [1613/1618], Loss: 1.7533, Perplexity: 5.7739
Epoch [3/3], Step [1614/1618], Loss: 1.9182, Perplexity: 6.8084
Epoch [3/3], Step [1615/1618], Loss: 2.0045, Perplexity: 7.4221
Epoch [3/3], Step [1616/1618], Loss: 1.8997, Perplexity: 6.6836
Epoch [3/3], Step [1617/1618], Loss: 1.8276, Perplexity: 6.2192
Epoch [3/3], Step [1618/1618], Loss: 1.9238, Perplexity: 6.8470
	
~~~~