In [0]:
#### Author: Arman Kabiri
#### Date: Feb. 18, 2020
#### Email: Arman.Kabiri94@gmail.com

In [2]:
from google.colab import drive
drive.mount('/gdrive')


Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /gdrive


In [3]:
import os
os.chdir('/gdrive/My Drive/NLP_Stuff/My_Language_Model')
!pwd

/gdrive/My Drive/NLP_Stuff/My_Language_Model


In [0]:
import argparse
import math
import os.path as path

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

from CorpusReader import CorpusReader
from Dictionary import Dictionary
from EmbeddingsLoader import EmbeddingsLoader
from Lang_Model import LanguageModel

In [0]:
class Args:
  corpus_train_file='Data/corpus-test.txt'
  corpus_valid_file=''
  embeddings_file='Data/English_Wiki_1Billion_embeddings.bin'
  output_model_path='Data/model.bin'
  output_id2word_path = 'Data/id2word.txt'
  output_word2id_path = 'Data/word2id.txt'
  n_layers=2
  hidden_size=300
  dropout_probablity=.25
  embeddings_dim=300
  batch_size=50
  seq_len=20
  epochs=2
  lr=0.001
  seed=120
  clip_grad = 5
  print_steps=50
  bidirectional_model=False
  tie_weights=False
  freez_embeddings=False
  gpu=True
  
args = Args()

In [6]:
torch.cuda.is_available()

True

In [7]:
def main():
    torch.set_num_threads(8)

    if torch.cuda.is_available():
        if not args.gpu:
            print("WARNING: You have a CUDA device, so you should probably run with --gpu")
    else:
        if args.gpu:
            print("You do not have a GPU device, so you should run CPU without --gpu option.")
            exit()

    torch.manual_seed(args.seed)
    corpus_train_reader = CorpusReader(args.corpus_train_file, 100000000)  # 100MB

    print("Generating Dictionaries...")
    dictionary = Dictionary()
    dictionary.build_dictionary(corpus_train_reader)

    print("Saving Dictionary...")
    save_dictionary(dictionary, args.output_id2word_path, args.output_word2id_path)

    print("Loading Embeddings...")
    embeddings_matrix = None
    if args.embeddings_file is not None:
        emb_loader = EmbeddingsLoader()
        embeddings_matrix = emb_loader.get_embeddings_matrix(args.embeddings_file, dictionary, args.embeddings_dim)

    model = LanguageModel(n_layers=args.n_layers, hidden_size=args.hidden_size, n_vocab=dictionary.get_dic_size(),
                          input_size=args.embeddings_dim, dropout_prob=args.dropout_probablity,
                          bidirectional=args.bidirectional_model, pret_emb_matrix=embeddings_matrix,
                          freez_emb=args.freez_embeddings, tie_weights=args.tie_weights, use_gpu=args.gpu)

    ###############
    total_param = []
    for p in model.parameters():
        total_param.append(int(p.numel()))
    print(total_param)
    print(sum(total_param))
    ###############


    # put it into train mode.
    model.train()
    if args.gpu:
        model.cuda()

    # Optimizer and Loss
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    criterion = nn.CrossEntropyLoss()

    print("Training starts ...")
    for i in range(args.epochs):
        print(f"Epoch {i + 1}:")
        train(corpus_train_reader, dictionary, model, optimizer, criterion, args)
        print(f"Saving Model at epoch {i + 1}...")
        model.save_model(args.output_model_path)


def train(corpus_train_reader, dictionary, model, optimizer, criterion, args):
    batch_generator = corpus_train_reader.batchify(dictionary, args.batch_size, args.seq_len)
    hidden = model.init_hidden(args.batch_size)

    step = 0
    for x, y in tqdm(batch_generator):

        step += 1
        x = torch.from_numpy(x)
        y = torch.from_numpy(y)

        if args.gpu:
            x = x.cuda()
            y = y.cuda()

        hidden = detach_hidden(hidden)
        model.zero_grad()

        y_hat, hidden = model.forward(x, hidden)

        loss = criterion.forward(y_hat.view(-1, dictionary.get_dic_size()),
                                 y.reshape(args.batch_size * args.seq_len).long())
        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), max_norm=args.clip_grad)

        optimizer.step()

        if step % args.print_steps == 0:
            print(f"Step {step},     Loss = {loss.item()},    PPL = {math.exp(loss)}")


def detach_hidden(hidden: tuple):
    return tuple(v.detach() for v in hidden)


def save_dictionary(dictionary: Dictionary, output_id2word_path, output_word2id_path):
    with open(output_word2id_path, 'w') as file:
        for word, word_id in dictionary.word2id.items():
            if '\t' in word:
                exit()
            file.write(f"{word}\t{word_id}\n")

    with open(output_id2word_path, 'w') as file:
        for word in dictionary.id2word:
            file.write(f"{word}\n")


if __name__ == '__main__':
    main()


0it [00:00, ?it/s]

Generating Dictionaries...
Building dictionaries...


2it [00:08,  4.29s/it]


Dictionaries are built - Vocab size is 254732
Saving Dictionary...
Loading Embeddings...
Loading pretrained embeddings...


  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


Pretrained embeddings are loaded.
[76419600, 360000, 360000, 1200, 1200, 360000, 360000, 1200, 1200, 76419600, 254732]
154538732


0it [00:00, ?it/s]

Training starts ...
Epoch 1:


51it [00:15,  6.99it/s]

Step 50,     Loss = 8.224552154541016,    PPL = 3731.4499160295786


101it [00:22,  7.01it/s]

Step 100,     Loss = 7.708901405334473,    PPL = 2228.0931418989517


151it [00:29,  6.99it/s]

Step 150,     Loss = 7.7591729164123535,    PPL = 2342.965974977437


201it [00:37,  7.00it/s]

Step 200,     Loss = 7.695934772491455,    PPL = 2199.3887778422272


251it [00:44,  6.98it/s]

Step 250,     Loss = 7.820855140686035,    PPL = 2492.035538113899


301it [00:51,  6.99it/s]

Step 300,     Loss = 7.749845504760742,    PPL = 2321.2137704305296


351it [00:58,  7.00it/s]

Step 350,     Loss = 7.473904132843018,    PPL = 1761.470292661736


401it [01:05,  7.03it/s]

Step 400,     Loss = 7.691178798675537,    PPL = 2188.9533773197104


451it [01:13,  6.99it/s]

Step 450,     Loss = 7.591791152954102,    PPL = 1981.8601511772413


501it [01:20,  7.01it/s]

Step 500,     Loss = 7.4439697265625,    PPL = 1709.5231082936173


551it [01:27,  7.02it/s]

Step 550,     Loss = 7.537919044494629,    PPL = 1877.9180923024444


601it [01:34,  6.98it/s]

Step 600,     Loss = 7.412885665893555,    PPL = 1657.2015837707352


651it [01:41,  7.01it/s]

Step 650,     Loss = 7.313813209533691,    PPL = 1500.889497296015


701it [01:49,  6.99it/s]

Step 700,     Loss = 7.395553112030029,    PPL = 1628.7255422074343


751it [01:56,  7.01it/s]

Step 750,     Loss = 7.537692546844482,    PPL = 1877.4927964334458


801it [02:03,  6.99it/s]

Step 800,     Loss = 7.362458229064941,    PPL = 1575.7052503938983


851it [02:10,  6.98it/s]

Step 850,     Loss = 7.361689567565918,    PPL = 1574.4945318101038


901it [02:18,  7.07it/s]

Step 900,     Loss = 7.288881778717041,    PPL = 1463.9327808058515


951it [02:25,  7.01it/s]

Step 950,     Loss = 7.554792404174805,    PPL = 1909.8737209608198


1001it [02:32,  6.99it/s]

Step 1000,     Loss = 7.285543441772461,    PPL = 1459.0538282442196


1051it [02:39,  7.04it/s]

Step 1050,     Loss = 7.394063949584961,    PPL = 1626.3019103353256


1101it [02:46,  7.00it/s]

Step 1100,     Loss = 7.130098342895508,    PPL = 1248.9997911296603


1151it [02:54,  6.97it/s]

Step 1150,     Loss = 7.144039154052734,    PPL = 1266.5337965107585


1201it [03:01,  7.01it/s]

Step 1200,     Loss = 6.9283766746521,    PPL = 1020.8354857292298


1251it [03:08,  6.98it/s]

Step 1250,     Loss = 7.128181457519531,    PPL = 1246.6078949232838


1301it [03:15,  6.98it/s]

Step 1300,     Loss = 7.355594635009766,    PPL = 1564.9272793251803


1351it [03:23,  6.98it/s]

Step 1350,     Loss = 6.894595623016357,    PPL = 986.9265537291365


1401it [03:30,  7.04it/s]

Step 1400,     Loss = 7.0151448249816895,    PPL = 1113.367878018597


1451it [03:37,  6.99it/s]

Step 1450,     Loss = 7.007061958312988,    PPL = 1104.4049457980927


1501it [03:44,  7.01it/s]

Step 1500,     Loss = 7.159383773803711,    PPL = 1286.1181489689352


1551it [03:51,  7.00it/s]

Step 1550,     Loss = 7.290343761444092,    PPL = 1466.0745905078568


1601it [03:59,  6.97it/s]

Step 1600,     Loss = 7.043037414550781,    PPL = 1144.8597444449242


1651it [04:06,  7.04it/s]

Step 1650,     Loss = 7.004096031188965,    PPL = 1101.1342139871847


1701it [04:13,  7.00it/s]

Step 1700,     Loss = 7.05507755279541,    PPL = 1158.7273303386626


1751it [04:20,  6.99it/s]

Step 1750,     Loss = 6.775120735168457,    PPL = 875.7850947124934


1801it [04:28,  7.02it/s]

Step 1800,     Loss = 6.895357608795166,    PPL = 987.6788643163903


1851it [04:35,  7.00it/s]

Step 1850,     Loss = 7.031622886657715,    PPL = 1131.8660107071257


1901it [04:42,  7.01it/s]

Step 1900,     Loss = 6.830391883850098,    PPL = 925.5534508681509


1951it [04:49,  6.99it/s]

Step 1950,     Loss = 7.018657684326172,    PPL = 1117.2858604042085


2001it [04:56,  6.99it/s]

Step 2000,     Loss = 6.692293643951416,    PPL = 806.1691983213638


2051it [05:04,  6.98it/s]

Step 2050,     Loss = 7.018410682678223,    PPL = 1117.0099230353537


2101it [05:11,  6.99it/s]

Step 2100,     Loss = 7.529336452484131,    PPL = 1861.8696545954258


2151it [05:18,  6.98it/s]

Step 2150,     Loss = 7.177240371704102,    PPL = 1309.2901141289087


2201it [05:25,  6.98it/s]

Step 2200,     Loss = 6.891780376434326,    PPL = 984.1520194535929


2251it [05:33,  6.97it/s]

Step 2250,     Loss = 6.922054767608643,    PPL = 1014.40221537663


2301it [05:40,  6.97it/s]

Step 2300,     Loss = 7.193493843078613,    PPL = 1330.7445053788558


2351it [05:47,  7.00it/s]

Step 2350,     Loss = 6.877767562866211,    PPL = 970.4574544621597


2401it [05:54,  7.01it/s]

Step 2400,     Loss = 6.84826135635376,    PPL = 942.2412599241244


2451it [06:01,  6.99it/s]

Step 2450,     Loss = 6.892085552215576,    PPL = 984.4524046478102


2501it [06:09,  6.98it/s]

Step 2500,     Loss = 6.850586414337158,    PPL = 944.4345742912693


2551it [06:16,  7.01it/s]

Step 2550,     Loss = 7.029907703399658,    PPL = 1129.9263170157947


2601it [06:23,  7.00it/s]

Step 2600,     Loss = 6.904545307159424,    PPL = 996.7951746287107


2651it [06:30,  7.02it/s]

Step 2650,     Loss = 6.946260929107666,    PPL = 1039.2566002716437


2701it [06:38,  6.98it/s]

Step 2700,     Loss = 6.767538070678711,    PPL = 869.169424077861


2751it [06:45,  7.01it/s]

Step 2750,     Loss = 6.938546657562256,    PPL = 1031.2703363611727


2801it [06:52,  6.99it/s]

Step 2800,     Loss = 7.10516357421875,    PPL = 1218.2413413129684


2851it [06:59,  7.00it/s]

Step 2850,     Loss = 6.7348127365112305,    PPL = 841.1859459220785


2901it [07:06,  7.01it/s]

Step 2900,     Loss = 6.897174835205078,    PPL = 989.4753322332349


2951it [07:14,  6.97it/s]

Step 2950,     Loss = 6.7611870765686035,    PPL = 863.6668261639804


3001it [07:21,  6.98it/s]

Step 3000,     Loss = 6.906805038452148,    PPL = 999.0502108055733


3051it [07:28,  7.01it/s]

Step 3050,     Loss = 6.642946720123291,    PPL = 767.3528388182543


3101it [07:35,  6.97it/s]

Step 3100,     Loss = 6.537876129150391,    PPL = 690.8178110416395


3151it [07:43,  7.01it/s]

Step 3150,     Loss = 6.971925258636475,    PPL = 1066.2736281811592


3201it [07:50,  7.00it/s]

Step 3200,     Loss = 6.7950758934021,    PPL = 893.4370630068428


3251it [07:57,  6.98it/s]

Step 3250,     Loss = 7.124118328094482,    PPL = 1241.5530419191339


3301it [08:04,  6.99it/s]

Step 3300,     Loss = 6.974653720855713,    PPL = 1069.1868880434415


3351it [08:12,  6.96it/s]

Step 3350,     Loss = 6.971702575683594,    PPL = 1066.0362136561218


3401it [08:19,  6.99it/s]

Step 3400,     Loss = 6.638258457183838,    PPL = 763.7637069136191


3451it [08:26,  7.01it/s]

Step 3450,     Loss = 6.828810214996338,    PPL = 924.0906889098842


3501it [08:33,  6.99it/s]

Step 3500,     Loss = 6.79675817489624,    PPL = 894.9413405985281


3551it [08:40,  6.97it/s]

Step 3550,     Loss = 6.781347751617432,    PPL = 881.2556378069364


3601it [08:48,  6.98it/s]

Step 3600,     Loss = 6.661603927612305,    PPL = 781.8038888810196


3651it [08:55,  6.97it/s]

Step 3650,     Loss = 6.913346290588379,    PPL = 1005.606670481014


3701it [09:02,  6.98it/s]

Step 3700,     Loss = 6.719460487365723,    PPL = 828.370474564391


3751it [09:09,  6.98it/s]

Step 3750,     Loss = 6.820379257202148,    PPL = 916.3324698774331


3801it [09:17,  6.99it/s]

Step 3800,     Loss = 6.683544635772705,    PPL = 799.1467817958996


3851it [09:24,  7.00it/s]

Step 3850,     Loss = 6.546940803527832,    PPL = 697.1083171744485


3901it [09:31,  6.98it/s]

Step 3900,     Loss = 6.60114049911499,    PPL = 735.9340429197481


3951it [09:38,  6.99it/s]

Step 3950,     Loss = 6.775287628173828,    PPL = 875.9312693164295


4001it [09:46,  6.98it/s]

Step 4000,     Loss = 6.66154670715332,    PPL = 781.7591549835213


4051it [09:53,  6.99it/s]

Step 4050,     Loss = 6.745624542236328,    PPL = 850.3300278922445


4101it [10:00,  7.02it/s]

Step 4100,     Loss = 6.718020915985107,    PPL = 827.1788340682315


4151it [10:07,  6.98it/s]

Step 4150,     Loss = 6.5049238204956055,    PPL = 668.4247471489003


4201it [10:14,  6.96it/s]

Step 4200,     Loss = 6.898067474365234,    PPL = 990.3589709893995


4251it [10:22,  6.99it/s]

Step 4250,     Loss = 6.763711452484131,    PPL = 865.849800063111


4301it [10:29,  7.01it/s]

Step 4300,     Loss = 6.809741973876953,    PPL = 906.6368407751413


4351it [10:36,  7.01it/s]

Step 4350,     Loss = 6.6021318435668945,    PPL = 736.6639687943684


4401it [10:43,  7.00it/s]

Step 4400,     Loss = 6.65247106552124,    PPL = 774.6962875561524


4451it [10:51,  7.00it/s]

Step 4450,     Loss = 6.999521255493164,    PPL = 1096.108276979927


4501it [10:58,  7.00it/s]

Step 4500,     Loss = 6.737441539764404,    PPL = 843.4001673743869


4551it [11:05,  6.98it/s]

Step 4550,     Loss = 6.4116339683532715,    PPL = 608.8877720339037


4601it [11:12,  6.98it/s]

Step 4600,     Loss = 6.596717834472656,    PPL = 732.6864402713203


4651it [11:19,  6.98it/s]

Step 4650,     Loss = 6.8293986320495605,    PPL = 924.6345996374434


4701it [11:27,  6.98it/s]

Step 4700,     Loss = 6.886066913604736,    PPL = 978.5451360840632


4751it [11:34,  7.00it/s]

Step 4750,     Loss = 6.649123191833496,    PPL = 772.1070388961934


4801it [11:41,  6.98it/s]

Step 4800,     Loss = 6.673430919647217,    PPL = 791.1051719279822


4851it [11:48,  6.99it/s]

Step 4850,     Loss = 6.517601490020752,    PPL = 676.9527586307778


4901it [11:56,  6.98it/s]

Step 4900,     Loss = 6.704667091369629,    PPL = 816.2062589703834


4951it [12:03,  6.97it/s]

Step 4950,     Loss = 6.712641716003418,    PPL = 822.7412198069196


5001it [12:10,  6.99it/s]

Step 5000,     Loss = 6.433814525604248,    PPL = 622.5441351122133


5051it [12:17,  6.99it/s]

Step 5050,     Loss = 6.731831073760986,    PPL = 838.6815486105876


5101it [12:25,  6.98it/s]

Step 5100,     Loss = 6.77138614654541,    PPL = 872.5204974198189


5151it [12:32,  6.97it/s]

Step 5150,     Loss = 6.757906436920166,    PPL = 860.8380891015535


5201it [12:39,  7.00it/s]

Step 5200,     Loss = 6.814598560333252,    PPL = 911.0507104711078


5251it [12:46,  7.00it/s]

Step 5250,     Loss = 6.74423360824585,    PPL = 849.1480971377116


5301it [12:54,  6.99it/s]

Step 5300,     Loss = 6.938809871673584,    PPL = 1031.5418169934965


5351it [13:01,  7.01it/s]

Step 5350,     Loss = 6.786563396453857,    PPL = 885.863961466924


5401it [13:08,  7.01it/s]

Step 5400,     Loss = 6.758600234985352,    PPL = 861.4355441348175


5451it [13:15,  6.96it/s]

Step 5450,     Loss = 6.730167388916016,    PPL = 837.2874068562261


5501it [13:22,  7.00it/s]

Step 5500,     Loss = 6.686473846435547,    PPL = 801.4910828699346


5551it [13:30,  6.98it/s]

Step 5550,     Loss = 6.649974822998047,    PPL = 772.7648693876012


5601it [13:37,  6.98it/s]

Step 5600,     Loss = 6.696971893310547,    PPL = 809.9494945463244


5651it [13:44,  6.99it/s]

Step 5650,     Loss = 6.594320297241211,    PPL = 730.9319013784055


5701it [13:51,  6.98it/s]

Step 5700,     Loss = 6.78071403503418,    PPL = 880.6973484124816


5751it [13:59,  6.98it/s]

Step 5750,     Loss = 6.670015811920166,    PPL = 788.408070609546


5801it [14:06,  7.00it/s]

Step 5800,     Loss = 6.759057998657227,    PPL = 861.8299683022243


5851it [14:13,  6.98it/s]

Step 5850,     Loss = 6.619267463684082,    PPL = 749.3959362579948


5901it [14:20,  6.99it/s]

Step 5900,     Loss = 6.840296745300293,    PPL = 934.7664811367023


5951it [14:28,  6.98it/s]

Step 5950,     Loss = 6.64309024810791,    PPL = 767.4629833289225


6001it [14:35,  7.00it/s]

Step 6000,     Loss = 6.31038761138916,    PPL = 550.2581938241766


6051it [14:42,  6.99it/s]

Step 6050,     Loss = 6.453833103179932,    PPL = 635.132159973449


6101it [14:49,  6.92it/s]

Step 6100,     Loss = 6.785928726196289,    PPL = 885.3019083363984


6151it [14:56,  7.02it/s]

Step 6150,     Loss = 6.435545921325684,    PPL = 623.6229390128833


6201it [15:04,  6.99it/s]

Step 6200,     Loss = 6.755885124206543,    PPL = 859.0998235086895


6251it [15:11,  7.01it/s]

Step 6250,     Loss = 6.580691814422607,    PPL = 721.0379812166137


6301it [15:18,  7.01it/s]

Step 6300,     Loss = 6.654311180114746,    PPL = 776.1231298744957


6351it [15:25,  6.99it/s]

Step 6350,     Loss = 6.420366287231445,    PPL = 614.2280568206271


6401it [15:33,  7.00it/s]

Step 6400,     Loss = 6.466979503631592,    PPL = 643.5369872520871


6451it [15:40,  6.98it/s]

Step 6450,     Loss = 6.584192276000977,    PPL = 723.5663696479435


6501it [15:47,  6.98it/s]

Step 6500,     Loss = 6.5188188552856445,    PPL = 677.7773592233308


6551it [15:54,  6.99it/s]

Step 6550,     Loss = 6.673304080963135,    PPL = 791.0048355524057


6601it [16:02,  6.97it/s]

Step 6600,     Loss = 6.580048561096191,    PPL = 720.5743202784319


6651it [16:09,  6.97it/s]

Step 6650,     Loss = 6.441833972930908,    PPL = 627.55666701645


6701it [16:16,  6.97it/s]

Step 6700,     Loss = 6.572479724884033,    PPL = 715.1409991666736


6751it [16:23,  6.99it/s]

Step 6750,     Loss = 6.323309421539307,    PPL = 557.41466343121


6801it [16:31,  6.97it/s]

Step 6800,     Loss = 6.3969244956970215,    PPL = 599.9969042965113


6851it [16:38,  6.99it/s]

Step 6850,     Loss = 6.654811859130859,    PPL = 776.5118157348514


6901it [16:45,  7.01it/s]

Step 6900,     Loss = 6.465979099273682,    PPL = 642.8935119669698


6951it [16:52,  7.00it/s]

Step 6950,     Loss = 6.591014385223389,    PPL = 728.5194946207849


7001it [16:59,  6.96it/s]

Step 7000,     Loss = 6.759298324584961,    PPL = 862.0371132790632


7051it [17:07,  6.97it/s]

Step 7050,     Loss = 6.723628044128418,    PPL = 831.8299593300654


7101it [17:14,  6.98it/s]

Step 7100,     Loss = 6.711635112762451,    PPL = 821.9134625100676


7151it [17:21,  6.98it/s]

Step 7150,     Loss = 6.603589057922363,    PPL = 737.7382286281809


7201it [17:28,  7.00it/s]

Step 7200,     Loss = 6.663281440734863,    PPL = 783.1164757969657


7251it [17:36,  6.98it/s]

Step 7250,     Loss = 6.842467308044434,    PPL = 936.7976540317729


7301it [17:43,  7.01it/s]

Step 7300,     Loss = 6.572461128234863,    PPL = 715.1277000640649


7351it [17:50,  6.95it/s]

Step 7350,     Loss = 6.498295307159424,    PPL = 664.008736758983


7401it [17:57,  6.98it/s]

Step 7400,     Loss = 6.665436267852783,    PPL = 784.8057758366131


7451it [18:05,  7.00it/s]

Step 7450,     Loss = 6.5501508712768555,    PPL = 699.3496776357706


7501it [18:12,  6.96it/s]

Step 7500,     Loss = 6.623642444610596,    PPL = 752.6817115467035


7551it [18:19,  6.98it/s]

Step 7550,     Loss = 6.625565528869629,    PPL = 754.1305745945797


7601it [18:26,  6.99it/s]

Step 7600,     Loss = 6.484396457672119,    PPL = 654.8436191050469


7651it [18:34,  6.96it/s]

Step 7650,     Loss = 6.5109663009643555,    PPL = 672.4759178621224


7701it [18:41,  7.01it/s]

Step 7700,     Loss = 6.47766637802124,    PPL = 650.4512664102579


7751it [18:48,  6.98it/s]

Step 7750,     Loss = 6.553427219390869,    PPL = 701.6447483053307


7801it [18:55,  6.95it/s]

Step 7800,     Loss = 6.720142364501953,    PPL = 828.9355140731926


7851it [19:02,  6.97it/s]

Step 7850,     Loss = 6.353622913360596,    PPL = 574.570562166236


7901it [19:10,  6.97it/s]

Step 7900,     Loss = 6.553678512573242,    PPL = 701.8210890027073


7951it [19:17,  6.99it/s]

Step 7950,     Loss = 6.60200834274292,    PPL = 736.5729958049651


8001it [19:24,  7.00it/s]

Step 8000,     Loss = 6.653338432312012,    PPL = 775.3685248849347


8051it [19:31,  6.99it/s]

Step 8050,     Loss = 6.451096534729004,    PPL = 633.3964533654653


8101it [19:39,  6.99it/s]

Step 8100,     Loss = 6.6837921142578125,    PPL = 799.3445779049682


8151it [19:46,  7.01it/s]

Step 8150,     Loss = 6.421281337738037,    PPL = 614.7903637455662


8201it [19:53,  6.98it/s]

Step 8200,     Loss = 6.555420875549316,    PPL = 703.0449820073152


8251it [20:00,  6.97it/s]

Step 8250,     Loss = 6.4449639320373535,    PPL = 629.5239709054689


8301it [20:08,  7.00it/s]

Step 8300,     Loss = 6.450462341308594,    PPL = 632.9948848517437


8351it [20:15,  6.93it/s]

Step 8350,     Loss = 6.59967041015625,    PPL = 734.8529492555218


8401it [20:22,  6.99it/s]

Step 8400,     Loss = 6.525042533874512,    PPL = 682.0087815097112


8451it [20:29,  6.98it/s]

Step 8450,     Loss = 6.768516540527344,    PPL = 870.0202963612262


8501it [20:37,  6.97it/s]

Step 8500,     Loss = 6.744709014892578,    PPL = 849.551883760965


8551it [20:44,  6.98it/s]

Step 8550,     Loss = 6.5792436599731445,    PPL = 719.9945625539233


8601it [20:51,  6.96it/s]

Step 8600,     Loss = 6.495108604431152,    PPL = 661.8961062564764


8651it [20:58,  6.97it/s]

Step 8650,     Loss = 6.457756519317627,    PPL = 637.6289424950896


8701it [21:05,  7.00it/s]

Step 8700,     Loss = 6.459207534790039,    PPL = 638.5548235277104


8751it [21:13,  6.97it/s]

Step 8750,     Loss = 6.614303112030029,    PPL = 745.6848903921248


8801it [21:20,  6.96it/s]

Step 8800,     Loss = 6.6737236976623535,    PPL = 791.3368240396516


8851it [21:27,  6.97it/s]

Step 8850,     Loss = 6.259464740753174,    PPL = 522.9389572705793


8901it [21:34,  6.99it/s]

Step 8900,     Loss = 6.667723178863525,    PPL = 786.6026106241832


8951it [21:42,  6.99it/s]

Step 8950,     Loss = 6.464132785797119,    PPL = 641.7076241090758


9001it [21:49,  6.98it/s]

Step 9000,     Loss = 6.512393474578857,    PPL = 673.4363429321244


9051it [21:56,  6.98it/s]

Step 9050,     Loss = 6.524695873260498,    PPL = 681.7723969017343


9101it [22:03,  7.00it/s]

Step 9100,     Loss = 6.299002170562744,    PPL = 544.0287912583634


9151it [22:11,  7.00it/s]

Step 9150,     Loss = 6.335544109344482,    PPL = 564.2763475053538


9201it [22:18,  6.97it/s]

Step 9200,     Loss = 6.400162220001221,    PPL = 601.9426770941636


9251it [22:25,  7.00it/s]

Step 9250,     Loss = 6.532907009124756,    PPL = 687.393569203063


9301it [22:32,  6.93it/s]

Step 9300,     Loss = 6.5546770095825195,    PPL = 702.5222052341035


9351it [22:40,  6.98it/s]

Step 9350,     Loss = 6.3115763664245605,    PPL = 550.9127049724908


9401it [22:47,  6.98it/s]

Step 9400,     Loss = 6.489689826965332,    PPL = 658.3191386993586


9451it [22:54,  6.96it/s]

Step 9450,     Loss = 6.558924198150635,    PPL = 705.5122947552832


9501it [23:01,  7.00it/s]

Step 9500,     Loss = 6.524106025695801,    PPL = 681.3703736916057


9551it [23:09,  7.00it/s]

Step 9550,     Loss = 6.376307964324951,    PPL = 587.7536892623584


9601it [23:16,  6.97it/s]

Step 9600,     Loss = 6.419780254364014,    PPL = 613.8682044441904


9651it [23:23,  6.98it/s]

Step 9650,     Loss = 6.43648624420166,    PPL = 624.2096217207774


9701it [23:30,  6.99it/s]

Step 9700,     Loss = 6.684141159057617,    PPL = 799.6236336717869


9751it [23:37,  6.99it/s]

Step 9750,     Loss = 6.299116134643555,    PPL = 544.0907945324996


9801it [23:45,  6.98it/s]

Step 9800,     Loss = 6.459571361541748,    PPL = 638.7871891227892


9851it [23:52,  6.97it/s]

Step 9850,     Loss = 6.147375583648682,    PPL = 467.48888995708796


9901it [23:59,  6.98it/s]

Step 9900,     Loss = 6.273338794708252,    PPL = 530.2448042454057


9951it [24:06,  6.97it/s]

Step 9950,     Loss = 6.769112586975098,    PPL = 870.5390234457068


10001it [24:14,  6.99it/s]

Step 10000,     Loss = 6.216213226318359,    PPL = 500.80320840173863


10051it [24:21,  7.00it/s]

Step 10050,     Loss = 6.385385036468506,    PPL = 593.1130587964519


10101it [24:28,  6.96it/s]

Step 10100,     Loss = 6.621214866638184,    PPL = 750.8567340373526


10151it [24:35,  6.98it/s]

Step 10150,     Loss = 6.5379638671875,    PPL = 690.8784246994034


10201it [24:43,  6.97it/s]

Step 10200,     Loss = 6.619084358215332,    PPL = 749.2587303257685


10251it [24:50,  6.95it/s]

Step 10250,     Loss = 6.418307304382324,    PPL = 612.9646728754554


10301it [24:57,  6.97it/s]

Step 10300,     Loss = 6.587907314300537,    PPL = 726.259445766546


10351it [25:04,  6.98it/s]

Step 10350,     Loss = 6.470461845397949,    PPL = 645.7819095056285


10401it [25:12,  6.99it/s]

Step 10400,     Loss = 6.209042072296143,    PPL = 497.22471774873384


10451it [25:19,  6.99it/s]

Step 10450,     Loss = 6.217552185058594,    PPL = 501.47421235782286


10501it [25:26,  6.99it/s]

Step 10500,     Loss = 6.415492057800293,    PPL = 611.2414529578596


10551it [25:33,  7.01it/s]

Step 10550,     Loss = 6.625034809112549,    PPL = 753.7304487859519


10601it [25:40,  6.98it/s]

Step 10600,     Loss = 6.704381942749023,    PPL = 815.9735520611123


10651it [25:48,  6.98it/s]

Step 10650,     Loss = 6.394150733947754,    PPL = 598.3349618154842


10701it [25:55,  6.99it/s]

Step 10700,     Loss = 6.351312160491943,    PPL = 573.2444043930002


10751it [26:02,  6.98it/s]

Step 10750,     Loss = 6.474931240081787,    PPL = 648.6746232633576


10801it [26:09,  7.00it/s]

Step 10800,     Loss = 6.466235160827637,    PPL = 643.0581533569372


10851it [26:17,  6.96it/s]

Step 10850,     Loss = 6.669271469116211,    PPL = 787.8214430886795


10901it [26:24,  6.96it/s]

Step 10900,     Loss = 6.412888050079346,    PPL = 609.6518460675434


10951it [26:31,  6.97it/s]

Step 10950,     Loss = 6.404336452484131,    PPL = 604.4605772640434


11001it [26:38,  6.98it/s]

Step 11000,     Loss = 6.373865604400635,    PPL = 586.3199347912756


11051it [26:46,  6.99it/s]

Step 11050,     Loss = 6.291687965393066,    PPL = 540.0641697632003


11101it [26:53,  6.99it/s]

Step 11100,     Loss = 6.350444793701172,    PPL = 572.7474068043945


11151it [27:00,  6.97it/s]

Step 11150,     Loss = 6.347784519195557,    PPL = 571.2257663679404


11201it [27:07,  6.97it/s]

Step 11200,     Loss = 6.348423480987549,    PPL = 571.590874439883


11251it [27:15,  6.98it/s]

Step 11250,     Loss = 6.508494853973389,    PPL = 670.8159813458931


11301it [27:22,  6.99it/s]

Step 11300,     Loss = 6.468331336975098,    PPL = 644.4075302912144


11351it [27:29,  7.01it/s]

Step 11350,     Loss = 6.248134136199951,    PPL = 517.0471844475504


11401it [27:36,  6.97it/s]

Step 11400,     Loss = 6.4476141929626465,    PPL = 631.1945864928505


11451it [27:44,  6.98it/s]

Step 11450,     Loss = 6.5340986251831055,    PPL = 688.2131666442856


11501it [27:51,  6.98it/s]

Step 11500,     Loss = 6.412240982055664,    PPL = 609.2574874545584


11551it [27:58,  6.99it/s]

Step 11550,     Loss = 6.506774425506592,    PPL = 669.6628826319485


11601it [28:05,  6.95it/s]

Step 11600,     Loss = 6.373196125030518,    PPL = 585.9275370563948


11651it [28:12,  7.00it/s]

Step 11650,     Loss = 6.398258686065674,    PPL = 600.797948641444


11701it [28:20,  6.99it/s]

Step 11700,     Loss = 6.566191673278809,    PPL = 710.6582642635532


11751it [28:27,  6.98it/s]

Step 11750,     Loss = 6.656558990478516,    PPL = 777.8696696991601


11801it [28:34,  6.96it/s]

Step 11800,     Loss = 6.484080791473389,    PPL = 654.6369397316033


11851it [28:41,  6.96it/s]

Step 11850,     Loss = 6.313099384307861,    PPL = 551.7523941428021


11901it [28:49,  6.97it/s]

Step 11900,     Loss = 6.459231376647949,    PPL = 638.5700480425704


11951it [28:56,  6.97it/s]

Step 11950,     Loss = 6.250553131103516,    PPL = 518.2994329323514


12001it [29:03,  6.98it/s]

Step 12000,     Loss = 6.264420509338379,    PPL = 525.5369539449989


12051it [29:10,  7.00it/s]

Step 12050,     Loss = 6.424948692321777,    PPL = 617.0491573710312


12101it [29:18,  6.99it/s]

Step 12100,     Loss = 6.383725643157959,    PPL = 592.1296670967456


12151it [29:25,  6.97it/s]

Step 12150,     Loss = 6.694292068481445,    PPL = 807.7818774947397


12201it [29:32,  6.99it/s]

Step 12200,     Loss = 6.351677894592285,    PPL = 573.4540977631875


12251it [29:39,  6.98it/s]

Step 12250,     Loss = 6.292870998382568,    PPL = 540.7034615695657


12301it [29:47,  6.99it/s]

Step 12300,     Loss = 6.438634395599365,    PPL = 625.5519597486021


12351it [29:54,  6.99it/s]

Step 12350,     Loss = 6.732190132141113,    PPL = 838.9827383179824


12401it [30:01,  6.99it/s]

Step 12400,     Loss = 6.228023529052734,    PPL = 506.75291063878564


12451it [30:08,  6.98it/s]

Step 12450,     Loss = 6.31931734085083,    PPL = 555.1938548906604


12501it [30:16,  6.98it/s]

Step 12500,     Loss = 6.148903846740723,    PPL = 468.20388218250264


12551it [30:23,  6.97it/s]

Step 12550,     Loss = 6.362841606140137,    PPL = 579.8918415937538


12601it [30:30,  6.98it/s]

Step 12600,     Loss = 6.429007053375244,    PPL = 619.5584540111881


12651it [30:37,  6.99it/s]

Step 12650,     Loss = 6.251527309417725,    PPL = 518.8045950192051


12701it [30:44,  6.96it/s]

Step 12700,     Loss = 6.155604839324951,    PPL = 471.35184839453814


12751it [30:52,  6.99it/s]

Step 12750,     Loss = 6.224195957183838,    PPL = 504.81698476474276


12801it [30:59,  6.99it/s]

Step 12800,     Loss = 6.381340980529785,    PPL = 590.7193198780814


12851it [31:06,  6.98it/s]

Step 12850,     Loss = 6.5495147705078125,    PPL = 698.9049622248991


12901it [31:13,  7.00it/s]

Step 12900,     Loss = 6.337955951690674,    PPL = 565.6389356085526


12951it [31:21,  7.00it/s]

Step 12950,     Loss = 6.215775966644287,    PPL = 500.584275222872


13001it [31:28,  6.98it/s]

Step 13000,     Loss = 6.3980183601379395,    PPL = 600.6535786656749


13051it [31:35,  6.98it/s]

Step 13050,     Loss = 6.173199653625488,    PPL = 479.71858639291406


13101it [31:42,  6.99it/s]

Step 13100,     Loss = 6.288825511932373,    PPL = 538.520471647514


13151it [31:50,  6.99it/s]

Step 13150,     Loss = 6.263402938842773,    PPL = 525.0024550375938


13201it [31:57,  6.97it/s]

Step 13200,     Loss = 6.436060905456543,    PPL = 623.9441776394127


13251it [32:04,  6.98it/s]

Step 13250,     Loss = 6.386341094970703,    PPL = 593.6803807323453


13301it [32:11,  6.98it/s]

Step 13300,     Loss = 6.417142868041992,    PPL = 612.2513299368425


13351it [32:19,  7.00it/s]

Step 13350,     Loss = 6.3739333152771,    PPL = 586.3596363720484


13401it [32:26,  6.96it/s]

Step 13400,     Loss = 6.292414665222168,    PPL = 540.4567769395774


13451it [32:33,  6.99it/s]

Step 13450,     Loss = 6.1586127281188965,    PPL = 472.77175672997566


13501it [32:40,  6.98it/s]

Step 13500,     Loss = 6.159623146057129,    PPL = 473.2496952118106


13551it [32:48,  6.98it/s]

Step 13550,     Loss = 6.6455254554748535,    PPL = 769.3341923059987


13601it [32:55,  6.99it/s]

Step 13600,     Loss = 6.2585225105285645,    PPL = 522.44646043855


13651it [33:02,  6.96it/s]

Step 13650,     Loss = 6.223664283752441,    PPL = 504.5486583235663


13701it [33:09,  6.98it/s]

Step 13700,     Loss = 6.278473377227783,    PPL = 532.9743915972209


13751it [33:17,  6.96it/s]

Step 13750,     Loss = 6.30907678604126,    PPL = 549.5373739735008


13801it [33:24,  7.00it/s]

Step 13800,     Loss = 6.2339677810668945,    PPL = 509.7741482513515


13851it [33:31,  6.96it/s]

Step 13850,     Loss = 6.3351898193359375,    PPL = 564.0764654435739


13901it [33:38,  6.98it/s]

Step 13900,     Loss = 6.336990833282471,    PPL = 565.0932904078677


13951it [33:46,  6.99it/s]

Step 13950,     Loss = 6.1868696212768555,    PPL = 486.32135091926875


14001it [33:53,  6.98it/s]

Step 14000,     Loss = 6.072385787963867,    PPL = 433.71419832245334


14051it [34:00,  6.96it/s]

Step 14050,     Loss = 6.035961627960205,    PPL = 418.20076989255267


14101it [34:07,  6.93it/s]

Step 14100,     Loss = 6.197922706604004,    PPL = 491.7265191710224


14151it [34:14,  6.98it/s]

Step 14150,     Loss = 6.39785623550415,    PPL = 600.5562058176815


14201it [34:22,  6.96it/s]

Step 14200,     Loss = 6.096956729888916,    PPL = 444.5029670184906


14251it [34:29,  6.99it/s]

Step 14250,     Loss = 6.241157531738281,    PPL = 513.4525046634616


14301it [34:36,  6.99it/s]

Step 14300,     Loss = 6.148773670196533,    PPL = 468.14293698604763


14351it [34:43,  6.97it/s]

Step 14350,     Loss = 6.060611248016357,    PPL = 428.63736053733265


14401it [34:51,  6.99it/s]

Step 14400,     Loss = 6.221643924713135,    PPL = 503.53031793395064


14451it [34:58,  6.95it/s]

Step 14450,     Loss = 6.253897666931152,    PPL = 520.0358060176571


14501it [35:05,  6.98it/s]

Step 14500,     Loss = 6.44489049911499,    PPL = 629.4777448178667


14551it [35:12,  6.99it/s]

Step 14550,     Loss = 6.118808746337891,    PPL = 454.32315788421676


14601it [35:20,  6.96it/s]

Step 14600,     Loss = 6.279346466064453,    PPL = 533.4399287868445


14651it [35:27,  6.96it/s]

Step 14650,     Loss = 6.424263000488281,    PPL = 616.6261968298229


14701it [35:34,  6.96it/s]

Step 14700,     Loss = 6.637957572937012,    PPL = 763.5339370147162


14751it [35:41,  6.99it/s]

Step 14750,     Loss = 6.452084064483643,    PPL = 634.0222601602035


14801it [35:49,  6.96it/s]

Step 14800,     Loss = 6.287561893463135,    PPL = 537.8404169887795


14851it [35:56,  6.97it/s]

Step 14850,     Loss = 6.257625579833984,    PPL = 521.9780722591764


14901it [36:03,  7.00it/s]

Step 14900,     Loss = 6.28740930557251,    PPL = 537.7583553150264


14951it [36:10,  6.97it/s]

Step 14950,     Loss = 6.078851699829102,    PPL = 436.52764204103323


15001it [36:18,  6.97it/s]

Step 15000,     Loss = 6.314068794250488,    PPL = 552.2875277395591


15051it [36:25,  6.97it/s]

Step 15050,     Loss = 6.241661548614502,    PPL = 513.711358618851


15101it [36:32,  6.98it/s]

Step 15100,     Loss = 6.109116554260254,    PPL = 449.9410410358866


15151it [36:39,  6.97it/s]

Step 15150,     Loss = 6.175770282745361,    PPL = 480.9533513418493


15201it [36:47,  6.97it/s]

Step 15200,     Loss = 5.919862270355225,    PPL = 372.36042527518134


15251it [36:54,  6.99it/s]

Step 15250,     Loss = 6.473044395446777,    PPL = 647.4518290047604


15301it [37:01,  6.96it/s]

Step 15300,     Loss = 6.220778465270996,    PPL = 503.0947213886524


15351it [37:08,  6.97it/s]

Step 15350,     Loss = 6.429588794708252,    PPL = 619.9189816288417


15401it [37:16,  6.96it/s]

Step 15400,     Loss = 6.284206867218018,    PPL = 536.0389719116391


15451it [37:23,  6.98it/s]

Step 15450,     Loss = 6.081449508666992,    PPL = 437.6631316614152


15501it [37:30,  6.99it/s]

Step 15500,     Loss = 6.227173328399658,    PPL = 506.3222520822468


15551it [37:37,  6.95it/s]

Step 15550,     Loss = 6.344620704650879,    PPL = 569.4213698730737


15601it [37:45,  6.96it/s]

Step 15600,     Loss = 6.542874813079834,    PPL = 694.2796360079433


15651it [37:52,  6.97it/s]

Step 15650,     Loss = 6.169243335723877,    PPL = 477.8244166014588


15701it [37:59,  6.98it/s]

Step 15700,     Loss = 6.097673416137695,    PPL = 444.8216503668404


15751it [38:06,  6.99it/s]

Step 15750,     Loss = 5.991401195526123,    PPL = 399.97466016992405


15801it [38:13,  6.97it/s]

Step 15800,     Loss = 6.141099452972412,    PPL = 464.56405651317795


15851it [38:21,  6.97it/s]

Step 15850,     Loss = 6.292397975921631,    PPL = 540.4477571692668


15901it [38:28,  6.97it/s]

Step 15900,     Loss = 6.104249000549316,    PPL = 447.7562504583306


15951it [38:35,  6.98it/s]

Step 15950,     Loss = 6.098170280456543,    PPL = 445.0427212897373


16001it [38:42,  6.98it/s]

Step 16000,     Loss = 6.360348224639893,    PPL = 578.4477510862129


16051it [38:50,  7.00it/s]

Step 16050,     Loss = 6.366125106811523,    PPL = 581.7990462853774


16101it [38:57,  6.98it/s]

Step 16100,     Loss = 6.3083720207214355,    PPL = 549.1502155343227


16151it [39:04,  7.00it/s]

Step 16150,     Loss = 6.265128135681152,    PPL = 525.9089693461368


16201it [39:11,  6.98it/s]

Step 16200,     Loss = 6.481040954589844,    PPL = 652.6499717761807


16251it [39:19,  6.97it/s]

Step 16250,     Loss = 5.99465274810791,    PPL = 401.27731548727553


16301it [39:26,  6.98it/s]

Step 16300,     Loss = 6.553173542022705,    PPL = 701.4667794864797


16351it [39:33,  6.98it/s]

Step 16350,     Loss = 6.2335944175720215,    PPL = 509.5838527206772


16401it [39:40,  6.99it/s]

Step 16400,     Loss = 6.4483513832092285,    PPL = 631.6600685390401


16451it [39:48,  6.95it/s]

Step 16450,     Loss = 6.088568687438965,    PPL = 440.7900510800706


16501it [39:55,  6.97it/s]

Step 16500,     Loss = 6.0753374099731445,    PPL = 434.99624983090564


16551it [40:02,  6.98it/s]

Step 16550,     Loss = 6.341942310333252,    PPL = 567.8982755457965


16601it [40:09,  6.98it/s]

Step 16600,     Loss = 6.264420509338379,    PPL = 525.5369539449989


16651it [40:17,  6.97it/s]

Step 16650,     Loss = 6.291442394256592,    PPL = 539.9315618742638


16701it [40:24,  6.99it/s]

Step 16700,     Loss = 6.256809711456299,    PPL = 521.5523805339967


16751it [40:31,  6.99it/s]

Step 16750,     Loss = 6.155694484710693,    PPL = 471.3941048068257


16801it [40:38,  6.89it/s]

Step 16800,     Loss = 6.404858112335205,    PPL = 604.7759823386888
Saving Model at epoch 1...



0it [00:00, ?it/s]

Epoch 2:


51it [00:12,  6.96it/s]

Step 50,     Loss = 6.040302753448486,    PPL = 420.02017819654867


101it [00:19,  7.00it/s]

Step 100,     Loss = 5.861415863037109,    PPL = 351.2210730803826


151it [00:27,  6.97it/s]

Step 150,     Loss = 6.004726409912109,    PPL = 405.34007653668795


201it [00:34,  6.99it/s]

Step 200,     Loss = 6.035027027130127,    PPL = 417.8101016937199


251it [00:41,  6.98it/s]

Step 250,     Loss = 6.335144996643066,    PPL = 564.0511825840346


301it [00:48,  6.96it/s]

Step 300,     Loss = 6.31033992767334,    PPL = 550.2319560943956


351it [00:56,  6.97it/s]

Step 350,     Loss = 6.196667194366455,    PPL = 491.10953790348793


401it [01:03,  6.98it/s]

Step 400,     Loss = 6.465551853179932,    PPL = 642.6188968935706


451it [01:10,  6.99it/s]

Step 450,     Loss = 6.252696514129639,    PPL = 519.4115385477379


501it [01:17,  7.00it/s]

Step 500,     Loss = 6.205511093139648,    PPL = 495.47212364181775


551it [01:25,  7.00it/s]

Step 550,     Loss = 6.289081573486328,    PPL = 538.6583836925535


601it [01:32,  6.98it/s]

Step 600,     Loss = 6.146963596343994,    PPL = 467.29633013816726


651it [01:39,  7.00it/s]

Step 650,     Loss = 6.152180194854736,    PPL = 469.7403967927673


701it [01:46,  7.01it/s]

Step 700,     Loss = 6.137640476226807,    PPL = 462.9599161862955


751it [01:53,  6.98it/s]

Step 750,     Loss = 6.458955764770508,    PPL = 638.3940748040228


801it [02:01,  6.96it/s]

Step 800,     Loss = 6.203497409820557,    PPL = 494.4754035674607


851it [02:08,  6.97it/s]

Step 850,     Loss = 6.11789608001709,    PPL = 453.90870159817166


901it [02:15,  6.98it/s]

Step 900,     Loss = 6.153970241546631,    PPL = 470.58200707221505


951it [02:22,  6.98it/s]

Step 950,     Loss = 6.3213419914245605,    PPL = 556.3190671439401


1001it [02:30,  6.99it/s]

Step 1000,     Loss = 6.144042015075684,    PPL = 465.9330783284461


1051it [02:37,  6.98it/s]

Step 1050,     Loss = 6.399949550628662,    PPL = 601.8146759341554


1101it [02:44,  6.97it/s]

Step 1100,     Loss = 6.076189994812012,    PPL = 435.3672791830006


1151it [02:51,  6.98it/s]

Step 1150,     Loss = 6.167377948760986,    PPL = 476.9339199827905


1201it [02:59,  6.98it/s]

Step 1200,     Loss = 5.8546552658081055,    PPL = 348.85461720781996


1251it [03:06,  6.98it/s]

Step 1250,     Loss = 6.066214084625244,    PPL = 431.0456860599582


1301it [03:13,  6.97it/s]

Step 1300,     Loss = 6.283473491668701,    PPL = 535.6459981524448


1351it [03:20,  6.98it/s]

Step 1350,     Loss = 5.971504211425781,    PPL = 392.0950211958011


1401it [03:28,  7.00it/s]

Step 1400,     Loss = 5.9514031410217285,    PPL = 384.29217705781684


1451it [03:35,  6.98it/s]

Step 1450,     Loss = 6.069634914398193,    PPL = 432.52274491940716


1501it [03:42,  6.97it/s]

Step 1500,     Loss = 6.200411796569824,    PPL = 492.951995243206


1551it [03:49,  6.97it/s]

Step 1550,     Loss = 6.24473237991333,    PPL = 515.2913041690099


1601it [03:57,  6.98it/s]

Step 1600,     Loss = 6.1095290184021,    PPL = 450.12666386000643


1651it [04:04,  6.98it/s]

Step 1650,     Loss = 6.088451862335205,    PPL = 440.73855874447423


1701it [04:11,  7.00it/s]

Step 1700,     Loss = 6.118758201599121,    PPL = 454.3001948192205


1751it [04:18,  6.97it/s]

Step 1750,     Loss = 5.764611721038818,    PPL = 318.81523075415043


1801it [04:26,  6.97it/s]

Step 1800,     Loss = 5.96424674987793,    PPL = 389.25970769056806


1851it [04:33,  6.97it/s]

Step 1850,     Loss = 6.146312236785889,    PPL = 466.99205131528197


1901it [04:40,  6.98it/s]

Step 1900,     Loss = 5.935680866241455,    PPL = 378.29747848851844


1951it [04:47,  6.98it/s]

Step 1950,     Loss = 6.126908779144287,    PPL = 458.01813488281005


2001it [04:54,  7.00it/s]

Step 2000,     Loss = 5.7986578941345215,    PPL = 329.85656027562857


2051it [05:02,  6.98it/s]

Step 2050,     Loss = 6.079285621643066,    PPL = 436.7171020097474


2101it [05:09,  6.99it/s]

Step 2100,     Loss = 6.586333751678467,    PPL = 725.1175297225892


2151it [05:16,  6.97it/s]

Step 2150,     Loss = 6.358298301696777,    PPL = 577.2631923117835


2201it [05:23,  6.97it/s]

Step 2200,     Loss = 5.925858974456787,    PPL = 374.6000690937773


2251it [05:31,  7.00it/s]

Step 2250,     Loss = 6.045403957366943,    PPL = 422.16826102149184


2301it [05:38,  6.98it/s]

Step 2300,     Loss = 6.297473907470703,    PPL = 543.1980071257367


2351it [05:45,  7.00it/s]

Step 2350,     Loss = 5.947417259216309,    PPL = 382.76348248315844


2401it [05:52,  7.00it/s]

Step 2400,     Loss = 6.079482078552246,    PPL = 436.8029065299636


2451it [06:00,  6.99it/s]

Step 2450,     Loss = 6.113977432250977,    PPL = 452.1334737965339


2501it [06:07,  7.01it/s]

Step 2500,     Loss = 6.018853187561035,    PPL = 411.10686281975853


2551it [06:14,  6.97it/s]

Step 2550,     Loss = 6.18239164352417,    PPL = 484.14848338634425


2601it [06:21,  6.98it/s]

Step 2600,     Loss = 6.197517395019531,    PPL = 491.52725710075055


2651it [06:29,  7.01it/s]

Step 2650,     Loss = 6.102555274963379,    PPL = 446.9985161188281


2701it [06:36,  6.97it/s]

Step 2700,     Loss = 5.986333847045898,    PPL = 397.95297579926444


2751it [06:43,  6.92it/s]

Step 2750,     Loss = 6.202086925506592,    PPL = 493.7784454069117


2801it [06:50,  6.97it/s]

Step 2800,     Loss = 6.270110130310059,    PPL = 528.5355824601576


2851it [06:58,  6.96it/s]

Step 2850,     Loss = 5.8703227043151855,    PPL = 354.36331638973564


2901it [07:05,  6.99it/s]

Step 2900,     Loss = 6.144184589385986,    PPL = 465.99951315157296


2951it [07:12,  7.01it/s]

Step 2950,     Loss = 6.122626781463623,    PPL = 456.06109530128896


3001it [07:19,  6.98it/s]

Step 3000,     Loss = 6.182852745056152,    PPL = 484.3717764701759


3051it [07:27,  6.98it/s]

Step 3050,     Loss = 5.945941925048828,    PPL = 382.1991947981911


3101it [07:34,  6.99it/s]

Step 3100,     Loss = 5.840198040008545,    PPL = 343.84742947046067


3151it [07:41,  6.96it/s]

Step 3150,     Loss = 6.259191989898682,    PPL = 522.7963446728745


3201it [07:48,  6.97it/s]

Step 3200,     Loss = 6.049169540405273,    PPL = 423.76096751680086


3251it [07:56,  6.97it/s]

Step 3250,     Loss = 6.371578216552734,    PPL = 584.9803263835339


3301it [08:03,  6.97it/s]

Step 3300,     Loss = 6.221977710723877,    PPL = 503.6984173631198


3351it [08:10,  6.97it/s]

Step 3350,     Loss = 6.263393402099609,    PPL = 524.9974482478939


3401it [08:17,  6.97it/s]

Step 3400,     Loss = 5.951204776763916,    PPL = 384.2159547854688


3451it [08:25,  6.97it/s]

Step 3450,     Loss = 6.0598530769348145,    PPL = 428.3125032503659


3501it [08:32,  6.96it/s]

Step 3500,     Loss = 6.174788951873779,    PPL = 480.4816084761633


3551it [08:39,  6.98it/s]

Step 3550,     Loss = 6.092121601104736,    PPL = 442.3589254635573


3601it [08:46,  6.97it/s]

Step 3600,     Loss = 5.972982883453369,    PPL = 392.6752299993463


3651it [08:53,  6.98it/s]

Step 3650,     Loss = 6.229078769683838,    PPL = 507.28793914228186


3701it [09:01,  6.99it/s]

Step 3700,     Loss = 6.063490390777588,    PPL = 429.87324698413903


3751it [09:08,  6.99it/s]

Step 3750,     Loss = 6.1695876121521,    PPL = 477.9889486055441


3801it [09:15,  6.97it/s]

Step 3800,     Loss = 5.969175815582275,    PPL = 391.18313081105913


3851it [09:22,  6.98it/s]

Step 3850,     Loss = 5.887578010559082,    PPL = 360.53102374202206


3901it [09:30,  6.99it/s]

Step 3900,     Loss = 5.949126243591309,    PPL = 383.41817856738834


3951it [09:37,  6.98it/s]

Step 3950,     Loss = 6.0445451736450195,    PPL = 421.80586542300665


4001it [09:44,  6.97it/s]

Step 4000,     Loss = 6.032803535461426,    PPL = 416.8821364573058


4051it [09:51,  6.95it/s]

Step 4050,     Loss = 6.0842156410217285,    PPL = 438.87544174245073


4101it [09:59,  6.98it/s]

Step 4100,     Loss = 6.096580505371094,    PPL = 444.33576555867


4151it [10:06,  6.98it/s]

Step 4150,     Loss = 5.904168605804443,    PPL = 366.56234126258096


4201it [10:13,  6.89it/s]

Step 4200,     Loss = 6.306277275085449,    PPL = 548.0010895003967


4251it [10:20,  6.97it/s]

Step 4250,     Loss = 6.157284736633301,    PPL = 472.14433655893845


4301it [10:28,  6.98it/s]

Step 4300,     Loss = 6.271453380584717,    PPL = 529.2460150640383


4351it [10:35,  7.00it/s]

Step 4350,     Loss = 6.004273414611816,    PPL = 405.15650096957023


4401it [10:42,  6.98it/s]

Step 4400,     Loss = 6.011902809143066,    PPL = 408.25942141336975


4451it [10:49,  6.98it/s]

Step 4450,     Loss = 6.455084323883057,    PPL = 635.9273478552561


4501it [10:57,  6.97it/s]

Step 4500,     Loss = 6.191746234893799,    PPL = 488.6987443444781


4551it [11:04,  6.98it/s]

Step 4550,     Loss = 5.789455413818359,    PPL = 326.83498608467613


4601it [11:11,  6.98it/s]

Step 4600,     Loss = 5.924630165100098,    PPL = 374.1400397259401


4651it [11:18,  6.98it/s]

Step 4650,     Loss = 6.288751125335693,    PPL = 538.4804144322642


4701it [11:26,  6.98it/s]

Step 4700,     Loss = 6.268891334533691,    PPL = 527.8917979252246


4751it [11:33,  6.98it/s]

Step 4750,     Loss = 6.06541109085083,    PPL = 430.69969798928844


4801it [11:40,  6.98it/s]

Step 4800,     Loss = 6.12452507019043,    PPL = 456.9276531655268


4851it [11:47,  6.98it/s]

Step 4850,     Loss = 6.003173828125,    PPL = 404.71124120173346


4901it [11:55,  6.99it/s]

Step 4900,     Loss = 6.031009674072266,    PPL = 416.13497803856313


4951it [12:02,  6.97it/s]

Step 4950,     Loss = 6.076885223388672,    PPL = 435.67006419703284


5001it [12:09,  6.95it/s]

Step 5000,     Loss = 5.838854789733887,    PPL = 343.3858663831565


5051it [12:16,  6.99it/s]

Step 5050,     Loss = 6.068672180175781,    PPL = 432.10654084999896


5101it [12:24,  6.96it/s]

Step 5100,     Loss = 6.176449775695801,    PPL = 481.2802668093758


5151it [12:31,  6.97it/s]

Step 5150,     Loss = 6.184833526611328,    PPL = 485.3321619938156


5201it [12:38,  7.03it/s]

Step 5200,     Loss = 6.182526111602783,    PPL = 484.2135902799701


5251it [12:45,  6.97it/s]

Step 5250,     Loss = 6.204179286956787,    PPL = 494.8126900204734


5301it [12:52,  6.99it/s]

Step 5300,     Loss = 6.343985080718994,    PPL = 569.0595470268678


5351it [13:00,  6.97it/s]

Step 5350,     Loss = 6.226118564605713,    PPL = 505.7884832521857


5401it [13:07,  6.97it/s]

Step 5400,     Loss = 6.159731864929199,    PPL = 473.3011491818398


5451it [13:14,  6.97it/s]

Step 5450,     Loss = 6.173111915588379,    PPL = 479.67649867215306


5501it [13:21,  6.97it/s]

Step 5500,     Loss = 6.062749862670898,    PPL = 429.55503160069304


5551it [13:29,  6.98it/s]

Step 5550,     Loss = 6.146284103393555,    PPL = 466.978913429493


5601it [13:36,  6.97it/s]

Step 5600,     Loss = 6.129868030548096,    PPL = 459.3755331422038


5651it [13:43,  6.97it/s]

Step 5650,     Loss = 6.062395095825195,    PPL = 429.40266674567374


5701it [13:50,  6.98it/s]

Step 5700,     Loss = 6.179319858551025,    PPL = 482.6635651926278


5751it [13:58,  6.96it/s]

Step 5750,     Loss = 6.121130466461182,    PPL = 455.37919453875935


5801it [14:05,  6.94it/s]

Step 5800,     Loss = 6.269306659698486,    PPL = 528.1110902088415


5851it [14:12,  6.99it/s]

Step 5850,     Loss = 6.127080917358398,    PPL = 458.09698409286574


5901it [14:19,  6.99it/s]

Step 5900,     Loss = 6.302761077880859,    PPL = 546.0775932791859


5951it [14:27,  6.98it/s]

Step 5950,     Loss = 6.07985258102417,    PPL = 436.96477307070154


6001it [14:34,  6.99it/s]

Step 6000,     Loss = 5.716207027435303,    PPL = 303.75061745771796


6051it [14:41,  6.95it/s]

Step 6050,     Loss = 5.997089862823486,    PPL = 402.256467005522


6101it [14:48,  6.99it/s]

Step 6100,     Loss = 6.331379413604736,    PPL = 561.9311950178068


6151it [14:56,  6.97it/s]

Step 6150,     Loss = 5.960068702697754,    PPL = 387.6367550137879


6201it [15:03,  6.98it/s]

Step 6200,     Loss = 6.300008773803711,    PPL = 544.5766881139355


6251it [15:10,  6.94it/s]

Step 6250,     Loss = 6.149207592010498,    PPL = 468.3461184977164


6301it [15:17,  6.95it/s]

Step 6300,     Loss = 6.14424991607666,    PPL = 466.0299563519887


6351it [15:25,  7.00it/s]

Step 6350,     Loss = 5.93413782119751,    PPL = 377.71419856852646


6401it [15:32,  6.97it/s]

Step 6400,     Loss = 5.916637897491455,    PPL = 371.1617299831955


6451it [15:39,  6.97it/s]

Step 6450,     Loss = 6.068415641784668,    PPL = 431.99570315089017


6501it [15:46,  6.98it/s]

Step 6500,     Loss = 6.043552875518799,    PPL = 421.38751585119417


6551it [15:54,  6.98it/s]

Step 6550,     Loss = 6.193234920501709,    PPL = 489.4268049239809


6601it [16:01,  7.00it/s]

Step 6600,     Loss = 5.9644856452941895,    PPL = 389.3527111590788


6651it [16:08,  6.97it/s]

Step 6650,     Loss = 5.920114517211914,    PPL = 372.454363869372


6701it [16:15,  6.99it/s]

Step 6700,     Loss = 6.0482940673828125,    PPL = 423.3901385708223


6751it [16:23,  6.95it/s]

Step 6750,     Loss = 5.864984035491943,    PPL = 352.47652894840536


6801it [16:30,  7.00it/s]

Step 6800,     Loss = 5.834238052368164,    PPL = 341.80419790628366


6851it [16:37,  6.98it/s]

Step 6850,     Loss = 6.122842311859131,    PPL = 456.1594009230792


6901it [16:44,  6.98it/s]

Step 6900,     Loss = 5.939031600952148,    PPL = 379.56717900750306


6951it [16:51,  6.98it/s]

Step 6950,     Loss = 6.140151023864746,    PPL = 464.12365931537823


7001it [16:59,  6.96it/s]

Step 7000,     Loss = 6.321374893188477,    PPL = 556.3373713236675


7051it [17:06,  6.98it/s]

Step 7050,     Loss = 6.257468223571777,    PPL = 521.8959422027806


7101it [17:13,  6.96it/s]

Step 7100,     Loss = 6.177122116088867,    PPL = 481.60395977687244


7151it [17:20,  7.00it/s]

Step 7150,     Loss = 6.088525772094727,    PPL = 440.77113482919333


7201it [17:28,  6.98it/s]

Step 7200,     Loss = 6.152955532073975,    PPL = 470.1047452339818


7251it [17:35,  6.98it/s]

Step 7250,     Loss = 6.352706432342529,    PPL = 574.0442203804399


7301it [17:42,  6.99it/s]

Step 7300,     Loss = 6.096821308135986,    PPL = 444.44277572321187


7351it [17:49,  6.97it/s]

Step 7350,     Loss = 6.063817501068115,    PPL = 430.013885947822


7401it [17:57,  6.96it/s]

Step 7400,     Loss = 6.09116792678833,    PPL = 441.93726021532683


7451it [18:04,  6.98it/s]

Step 7450,     Loss = 6.110113620758057,    PPL = 450.3898859007941


7501it [18:11,  6.98it/s]

Step 7500,     Loss = 6.1753363609313965,    PPL = 480.74470046356026


7551it [18:18,  6.98it/s]

Step 7550,     Loss = 6.128255367279053,    PPL = 458.6353121172606


7601it [18:26,  6.97it/s]

Step 7600,     Loss = 6.026745319366455,    PPL = 414.3642091634155


7651it [18:33,  6.98it/s]

Step 7650,     Loss = 6.077780246734619,    PPL = 436.06017362810167


7701it [18:40,  6.97it/s]

Step 7700,     Loss = 6.077688694000244,    PPL = 436.0202529543053


7751it [18:47,  6.98it/s]

Step 7750,     Loss = 6.135539531707764,    PPL = 461.9882841177778


7801it [18:55,  6.98it/s]

Step 7800,     Loss = 6.259171009063721,    PPL = 522.7853760841143


7851it [19:02,  6.97it/s]

Step 7850,     Loss = 5.80611515045166,    PPL = 332.32557980726773


7901it [19:09,  6.99it/s]

Step 7900,     Loss = 6.0667243003845215,    PPL = 431.26566847643016


7951it [19:16,  6.98it/s]

Step 7950,     Loss = 6.223395824432373,    PPL = 504.41322571369795


8001it [19:24,  7.00it/s]

Step 8000,     Loss = 6.2171807289123535,    PPL = 501.28797127180104


8051it [19:31,  7.00it/s]

Step 8050,     Loss = 5.983755588531494,    PPL = 396.9282716947548


8101it [19:38,  6.97it/s]

Step 8100,     Loss = 6.2474141120910645,    PPL = 516.6750320047597


8151it [19:45,  6.96it/s]

Step 8150,     Loss = 5.961088180541992,    PPL = 388.0321436078475


8201it [19:52,  7.01it/s]

Step 8200,     Loss = 6.037622928619385,    PPL = 418.8961045272658


8251it [20:00,  6.99it/s]

Step 8250,     Loss = 6.024500370025635,    PPL = 413.43502588013547


8301it [20:07,  6.98it/s]

Step 8300,     Loss = 6.021275043487549,    PPL = 412.1037110360476


8351it [20:14,  6.96it/s]

Step 8350,     Loss = 6.141848564147949,    PPL = 464.9121970213571


8401it [20:21,  6.98it/s]

Step 8400,     Loss = 6.076767921447754,    PPL = 435.61896225014004


8451it [20:29,  6.97it/s]

Step 8450,     Loss = 6.2646965980529785,    PPL = 525.6820687984531


8501it [20:36,  6.97it/s]

Step 8500,     Loss = 6.2640252113342285,    PPL = 525.3292512909242


8551it [20:43,  6.99it/s]

Step 8550,     Loss = 6.163727760314941,    PPL = 475.1961947402795


8601it [20:50,  6.97it/s]

Step 8600,     Loss = 6.0333781242370605,    PPL = 417.1217410841493


8651it [20:58,  6.96it/s]

Step 8650,     Loss = 6.005306720733643,    PPL = 405.57536803400126


8701it [21:05,  6.98it/s]

Step 8700,     Loss = 6.022325038909912,    PPL = 412.53664529589446


8751it [21:12,  6.98it/s]

Step 8750,     Loss = 6.262485504150391,    PPL = 524.5210204479215


8801it [21:19,  6.98it/s]

Step 8800,     Loss = 6.299136638641357,    PPL = 544.1019506833276


8851it [21:27,  6.97it/s]

Step 8850,     Loss = 5.851760387420654,    PPL = 347.8461858634027


8901it [21:34,  6.98it/s]

Step 8900,     Loss = 6.313085556030273,    PPL = 551.7447644102892


8951it [21:41,  6.97it/s]

Step 8950,     Loss = 6.020211696624756,    PPL = 411.6657347493959


9001it [21:48,  7.00it/s]

Step 9000,     Loss = 6.15049409866333,    PPL = 468.9490366409355


9051it [21:56,  6.98it/s]

Step 9050,     Loss = 6.149742126464844,    PPL = 468.5965325561211


9101it [22:03,  6.97it/s]

Step 9100,     Loss = 5.8099493980407715,    PPL = 333.602244320291


9151it [22:10,  6.98it/s]

Step 9150,     Loss = 5.902949333190918,    PPL = 366.1156741985246


9201it [22:17,  6.98it/s]

Step 9200,     Loss = 6.0314860343933105,    PPL = 416.3332554522964


9251it [22:25,  6.94it/s]

Step 9250,     Loss = 6.15889310836792,    PPL = 472.90433117761864


9301it [22:32,  7.00it/s]

Step 9300,     Loss = 6.14951229095459,    PPL = 468.48884480866604


9351it [22:39,  6.99it/s]

Step 9350,     Loss = 5.956029891967773,    PPL = 386.07432084012333


9401it [22:46,  6.97it/s]

Step 9400,     Loss = 6.056952476501465,    PPL = 427.0719398769462


9451it [22:54,  6.98it/s]

Step 9450,     Loss = 6.172102928161621,    PPL = 479.1927552026607


9501it [23:01,  6.98it/s]

Step 9500,     Loss = 6.147903919219971,    PPL = 467.73594622539514


9551it [23:08,  6.98it/s]

Step 9550,     Loss = 5.972831726074219,    PPL = 392.6158787265268


9601it [23:15,  6.97it/s]

Step 9600,     Loss = 6.037541389465332,    PPL = 418.8619494857722


9651it [23:22,  6.98it/s]

Step 9650,     Loss = 6.015990734100342,    PPL = 409.93177118248354


9701it [23:30,  6.96it/s]

Step 9700,     Loss = 6.254525661468506,    PPL = 520.3624882296505


9751it [23:37,  6.99it/s]

Step 9750,     Loss = 5.927306652069092,    PPL = 375.14276195474474


9801it [23:44,  6.97it/s]

Step 9800,     Loss = 6.073502540588379,    PPL = 434.1988203428087


9851it [23:51,  6.95it/s]

Step 9850,     Loss = 5.74490213394165,    PPL = 312.59303408492076


9901it [23:59,  6.99it/s]

Step 9900,     Loss = 5.912638187408447,    PPL = 369.6801555785433


9951it [24:06,  7.00it/s]

Step 9950,     Loss = 6.363018035888672,    PPL = 579.9941607913536


10001it [24:13,  6.98it/s]

Step 10000,     Loss = 5.85742712020874,    PPL = 349.82293280866065


10051it [24:20,  6.97it/s]

Step 10050,     Loss = 6.023946285247803,    PPL = 413.2060112782843


10101it [24:28,  6.97it/s]

Step 10100,     Loss = 6.156038284301758,    PPL = 471.556197769439


10151it [24:35,  6.96it/s]

Step 10150,     Loss = 6.131557464599609,    PPL = 460.15227375159685


10201it [24:42,  6.99it/s]

Step 10200,     Loss = 6.2007036209106445,    PPL = 493.0958716265682


10251it [24:49,  6.97it/s]

Step 10250,     Loss = 6.037864685058594,    PPL = 418.99738760031903


10301it [24:57,  6.97it/s]

Step 10300,     Loss = 6.1864471435546875,    PPL = 486.11593437770233


10351it [25:04,  6.98it/s]

Step 10350,     Loss = 6.126670837402344,    PPL = 457.90916621456586


10401it [25:11,  6.98it/s]

Step 10400,     Loss = 5.828373908996582,    PPL = 339.8056746254057


10451it [25:18,  7.00it/s]

Step 10450,     Loss = 5.892845630645752,    PPL = 362.43517497419055


10501it [25:26,  6.99it/s]

Step 10500,     Loss = 6.087882995605469,    PPL = 440.4879085419324


10551it [25:33,  6.97it/s]

Step 10550,     Loss = 6.234107494354248,    PPL = 509.84537544898865


10601it [25:40,  7.00it/s]

Step 10600,     Loss = 6.307217597961426,    PPL = 548.5166298101003


10651it [25:47,  6.98it/s]

Step 10650,     Loss = 6.025480270385742,    PPL = 413.84034956685696


10701it [25:55,  6.96it/s]

Step 10700,     Loss = 5.997239589691162,    PPL = 402.31670011547396


10751it [26:02,  6.98it/s]

Step 10750,     Loss = 6.083913803100586,    PPL = 438.742992481585


10801it [26:09,  6.99it/s]

Step 10800,     Loss = 6.126553058624268,    PPL = 457.855237408396


10851it [26:16,  6.99it/s]

Step 10850,     Loss = 6.309416770935059,    PPL = 549.7242401432673


10901it [26:23,  6.96it/s]

Step 10900,     Loss = 6.025206565856934,    PPL = 413.72709508881553


10951it [26:31,  6.94it/s]

Step 10950,     Loss = 6.05124568939209,    PPL = 424.6416723410287


11001it [26:38,  7.00it/s]

Step 11000,     Loss = 6.109429836273193,    PPL = 450.08202155310346


11051it [26:45,  7.00it/s]

Step 11050,     Loss = 5.969268321990967,    PPL = 391.21931943144494


11101it [26:52,  6.98it/s]

Step 11100,     Loss = 5.98544979095459,    PPL = 397.60131851224133


11151it [27:00,  6.94it/s]

Step 11150,     Loss = 5.999228477478027,    PPL = 403.11765913156023


11201it [27:07,  6.99it/s]

Step 11200,     Loss = 5.900979518890381,    PPL = 365.39520413691486


11251it [27:14,  6.97it/s]

Step 11250,     Loss = 6.192803859710693,    PPL = 489.215877682804


11301it [27:21,  6.99it/s]

Step 11300,     Loss = 6.122427940368652,    PPL = 455.9704206289501


11351it [27:29,  6.97it/s]

Step 11350,     Loss = 5.92332649230957,    PPL = 373.65260133542193


11401it [27:36,  6.97it/s]

Step 11400,     Loss = 6.1332011222839355,    PPL = 460.90922848912794


11451it [27:43,  6.97it/s]

Step 11450,     Loss = 6.1587605476379395,    PPL = 472.84164678910275


11501it [27:50,  7.00it/s]

Step 11500,     Loss = 6.010354518890381,    PPL = 407.62780641847417


11551it [27:58,  6.97it/s]

Step 11550,     Loss = 6.083643913269043,    PPL = 438.62459618695016


11601it [28:05,  6.97it/s]

Step 11600,     Loss = 6.041340351104736,    PPL = 420.45621632597346


11651it [28:12,  6.98it/s]

Step 11650,     Loss = 6.055167198181152,    PPL = 426.31017778247156


11701it [28:19,  6.98it/s]

Step 11700,     Loss = 6.2734456062316895,    PPL = 530.301443525552


11751it [28:27,  6.97it/s]

Step 11750,     Loss = 6.304077625274658,    PPL = 546.7970037765245


11801it [28:34,  6.97it/s]

Step 11800,     Loss = 6.1313910484313965,    PPL = 460.07570334485825


11851it [28:41,  7.02it/s]

Step 11850,     Loss = 5.945014476776123,    PPL = 381.84488914062797


11901it [28:48,  6.95it/s]

Step 11900,     Loss = 6.124602317810059,    PPL = 456.9629511023997


11951it [28:56,  6.97it/s]

Step 11950,     Loss = 5.935665607452393,    PPL = 378.29170617113067


12001it [29:03,  6.99it/s]

Step 12000,     Loss = 5.863029956817627,    PPL = 351.78843459419494


12051it [29:10,  6.96it/s]

Step 12050,     Loss = 6.072554111480713,    PPL = 433.7872087661358


12101it [29:17,  6.98it/s]

Step 12100,     Loss = 6.062987804412842,    PPL = 429.6572528340396


12151it [29:25,  6.96it/s]

Step 12150,     Loss = 6.3822245597839355,    PPL = 591.2414978730005


12201it [29:32,  6.96it/s]

Step 12200,     Loss = 5.981647968292236,    PPL = 396.09257860698017


12251it [29:39,  6.98it/s]

Step 12250,     Loss = 5.954019546508789,    PPL = 385.2989577173935


12301it [29:46,  7.00it/s]

Step 12300,     Loss = 6.060135841369629,    PPL = 428.433631917904


12351it [29:54,  6.97it/s]

Step 12350,     Loss = 6.406006813049316,    PPL = 605.4710880992869


12401it [30:01,  6.98it/s]

Step 12400,     Loss = 5.982449054718018,    PPL = 396.4100101231186


12451it [30:08,  6.94it/s]

Step 12450,     Loss = 6.062747955322266,    PPL = 429.5542122902722


12501it [30:15,  6.95it/s]

Step 12500,     Loss = 5.804255962371826,    PPL = 331.7082980499157


12551it [30:22,  6.97it/s]

Step 12550,     Loss = 6.040587902069092,    PPL = 420.13996344847595


12601it [30:30,  6.98it/s]

Step 12600,     Loss = 6.1467437744140625,    PPL = 467.19361944647113


12651it [30:37,  6.96it/s]

Step 12650,     Loss = 5.943403720855713,    PPL = 381.2303253138032


12701it [30:44,  6.98it/s]

Step 12700,     Loss = 5.8368048667907715,    PPL = 342.6826728098816


12751it [30:51,  6.98it/s]

Step 12750,     Loss = 5.868143558502197,    PPL = 353.591947820091


12801it [30:59,  7.00it/s]

Step 12800,     Loss = 6.116808891296387,    PPL = 453.4154853357248


12851it [31:06,  6.99it/s]

Step 12850,     Loss = 6.221518039703369,    PPL = 503.4669350045243


12901it [31:13,  7.00it/s]

Step 12900,     Loss = 5.987024307250977,    PPL = 398.2278413734968


12951it [31:20,  6.94it/s]

Step 12950,     Loss = 5.938148498535156,    PPL = 379.2321302772569


13001it [31:28,  6.92it/s]

Step 13000,     Loss = 6.060873031616211,    PPL = 428.7495854572867


13051it [31:35,  6.97it/s]

Step 13050,     Loss = 5.84340763092041,    PPL = 344.95281201912616


13101it [31:42,  6.98it/s]

Step 13100,     Loss = 5.976329326629639,    PPL = 393.99149652045594


13151it [31:49,  6.99it/s]

Step 13150,     Loss = 5.957216262817383,    PPL = 386.5326199627568


13201it [31:57,  6.96it/s]

Step 13200,     Loss = 6.142496109008789,    PPL = 465.21334601849975


13251it [32:04,  6.98it/s]

Step 13250,     Loss = 6.007458209991455,    PPL = 406.448898440245


13301it [32:11,  6.97it/s]

Step 13300,     Loss = 6.088058948516846,    PPL = 440.5654204908933


13351it [32:18,  6.99it/s]

Step 13350,     Loss = 6.041312217712402,    PPL = 420.44438763267175


13401it [32:26,  6.96it/s]

Step 13400,     Loss = 5.984888553619385,    PPL = 397.37823241574444


13451it [32:33,  7.00it/s]

Step 13450,     Loss = 5.77408504486084,    PPL = 321.8498218117282


13501it [32:40,  6.98it/s]

Step 13500,     Loss = 5.911238670349121,    PPL = 369.1631437621323


13551it [32:47,  7.00it/s]

Step 13550,     Loss = 6.352649688720703,    PPL = 574.0116479564346


13601it [32:55,  7.00it/s]

Step 13600,     Loss = 5.930684566497803,    PPL = 376.41210476190423


13651it [33:02,  7.00it/s]

Step 13650,     Loss = 5.897191047668457,    PPL = 364.01353378313644


13701it [33:09,  6.97it/s]

Step 13700,     Loss = 6.034422874450684,    PPL = 417.5577568363695


13751it [33:16,  6.96it/s]

Step 13750,     Loss = 5.976966857910156,    PPL = 394.2427585089239


13801it [33:23,  6.98it/s]

Step 13800,     Loss = 5.944437026977539,    PPL = 381.62445653680993


13851it [33:31,  6.99it/s]

Step 13850,     Loss = 6.001164436340332,    PPL = 403.898834253775


13901it [33:38,  6.98it/s]

Step 13900,     Loss = 6.02344274520874,    PPL = 412.9979978831561


13951it [33:45,  6.98it/s]

Step 13950,     Loss = 5.849557876586914,    PPL = 347.0808939617881


14001it [33:52,  6.99it/s]

Step 14000,     Loss = 5.741601467132568,    PPL = 311.5629695176918


14051it [34:00,  6.98it/s]

Step 14050,     Loss = 5.732971668243408,    PPL = 308.8858120295408


14101it [34:07,  6.99it/s]

Step 14100,     Loss = 5.886497974395752,    PPL = 360.1418473984712


14151it [34:14,  7.01it/s]

Step 14150,     Loss = 6.091097354888916,    PPL = 441.9060729639363


14201it [34:21,  6.98it/s]

Step 14200,     Loss = 5.781048774719238,    PPL = 324.09891898950264


14251it [34:29,  6.97it/s]

Step 14250,     Loss = 5.9122843742370605,    PPL = 369.5493810065466


14301it [34:36,  6.98it/s]

Step 14300,     Loss = 5.81813907623291,    PPL = 336.345557438251


14351it [34:43,  6.96it/s]

Step 14350,     Loss = 5.7220778465271,    PPL = 305.5391272535893


14401it [34:50,  6.98it/s]

Step 14400,     Loss = 5.887814521789551,    PPL = 360.61630346247773


14451it [34:58,  6.97it/s]

Step 14450,     Loss = 5.926600933074951,    PPL = 374.8781099780662


14501it [35:05,  6.98it/s]

Step 14500,     Loss = 6.114755153656006,    PPL = 452.4852444490881


14551it [35:12,  6.97it/s]

Step 14550,     Loss = 5.803289890289307,    PPL = 331.38799866468787


14601it [35:19,  6.98it/s]

Step 14600,     Loss = 5.981320381164551,    PPL = 395.9628450275423


14651it [35:27,  6.99it/s]

Step 14650,     Loss = 6.096668720245361,    PPL = 444.37496431129244


14701it [35:34,  6.96it/s]

Step 14700,     Loss = 6.349879741668701,    PPL = 572.4238661353521


14751it [35:41,  6.99it/s]

Step 14750,     Loss = 6.182888984680176,    PPL = 484.3893302393118


14801it [35:48,  6.99it/s]

Step 14800,     Loss = 5.9988813400268555,    PPL = 402.9777461807645


14851it [35:56,  6.98it/s]

Step 14850,     Loss = 6.0138258934021,    PPL = 409.04529408778035


14901it [36:03,  6.96it/s]

Step 14900,     Loss = 5.9715142250061035,    PPL = 392.09894749044787


14951it [36:10,  6.98it/s]

Step 14950,     Loss = 5.800074577331543,    PPL = 330.3241936879858


15001it [36:17,  7.01it/s]

Step 15000,     Loss = 6.012316703796387,    PPL = 408.42843277909793


15051it [36:24,  6.98it/s]

Step 15050,     Loss = 5.911056041717529,    PPL = 369.09573015836736


15101it [36:32,  6.96it/s]

Step 15100,     Loss = 5.869649887084961,    PPL = 354.1249748338529


15151it [36:39,  6.98it/s]

Step 15150,     Loss = 5.8442230224609375,    PPL = 345.2341983283455


15201it [36:46,  6.97it/s]

Step 15200,     Loss = 5.669045925140381,    PPL = 289.757951695329


15251it [36:53,  6.96it/s]

Step 15250,     Loss = 6.178398132324219,    PPL = 482.218886493358


15301it [37:01,  6.97it/s]

Step 15300,     Loss = 5.8993401527404785,    PPL = 364.79667834342484


15351it [37:08,  6.96it/s]

Step 15350,     Loss = 6.175987720489502,    PPL = 481.05794012396433


15401it [37:15,  6.97it/s]

Step 15400,     Loss = 6.004919528961182,    PPL = 405.41836298587856


15451it [37:22,  6.98it/s]

Step 15450,     Loss = 5.761422157287598,    PPL = 317.7999692321888


15501it [37:30,  6.97it/s]

Step 15500,     Loss = 5.967026710510254,    PPL = 390.3433398832402


15551it [37:37,  7.01it/s]

Step 15550,     Loss = 6.025149822235107,    PPL = 413.70361938104725


15601it [37:44,  7.00it/s]

Step 15600,     Loss = 6.286009311676025,    PPL = 537.0060236525494


15651it [37:51,  6.99it/s]

Step 15650,     Loss = 5.911012172698975,    PPL = 369.07953864608805


15701it [37:59,  6.98it/s]

Step 15700,     Loss = 5.881327152252197,    PPL = 358.2844242999202


15751it [38:06,  6.97it/s]

Step 15750,     Loss = 5.760980606079102,    PPL = 317.65967524748294


15801it [38:13,  6.99it/s]

Step 15800,     Loss = 5.927779674530029,    PPL = 375.3202548829688


15851it [38:20,  6.98it/s]

Step 15850,     Loss = 6.0142598152160645,    PPL = 409.22282627854423


15901it [38:28,  6.96it/s]

Step 15900,     Loss = 5.877646446228027,    PPL = 356.96810863261226


15951it [38:35,  6.99it/s]

Step 15950,     Loss = 5.808660507202148,    PPL = 333.17254442146015


16001it [38:42,  6.97it/s]

Step 16000,     Loss = 6.059257984161377,    PPL = 428.05769340017866


16051it [38:49,  6.96it/s]

Step 16050,     Loss = 6.145450115203857,    PPL = 466.5896208861108


16101it [38:57,  6.94it/s]

Step 16100,     Loss = 6.0571136474609375,    PPL = 427.1407770183851


16151it [39:04,  6.96it/s]

Step 16150,     Loss = 6.007683277130127,    PPL = 406.54038702598416


16201it [39:11,  6.98it/s]

Step 16200,     Loss = 6.147611141204834,    PPL = 467.5990234684158


16251it [39:18,  6.96it/s]

Step 16250,     Loss = 5.7443342208862305,    PPL = 312.4155588198607


16301it [39:26,  7.00it/s]

Step 16300,     Loss = 6.296534538269043,    PPL = 542.6879832353113


16351it [39:33,  6.99it/s]

Step 16350,     Loss = 5.960250377655029,    PPL = 387.70718530220944


16401it [39:40,  6.98it/s]

Step 16400,     Loss = 6.149001121520996,    PPL = 468.2494288274991


16451it [39:47,  6.98it/s]

Step 16450,     Loss = 5.7913594245910645,    PPL = 327.4578762256695


16501it [39:55,  6.97it/s]

Step 16500,     Loss = 5.76974630355835,    PPL = 320.45642367950313


16551it [40:02,  6.98it/s]

Step 16550,     Loss = 6.0692830085754395,    PPL = 432.37056442516797


16601it [40:09,  6.99it/s]

Step 16600,     Loss = 5.99635648727417,    PPL = 401.9615700963744


16651it [40:16,  6.97it/s]

Step 16650,     Loss = 6.032737731933594,    PPL = 416.85470504458857


16701it [40:23,  6.97it/s]

Step 16700,     Loss = 6.032007217407227,    PPL = 416.5502978276532


16751it [40:31,  6.98it/s]

Step 16750,     Loss = 5.874884128570557,    PPL = 355.98340997436486


16801it [40:38,  6.89it/s]

Step 16800,     Loss = 6.07335901260376,    PPL = 434.13650513329605
Saving Model at epoch 2...



