# Config

In [196]:
%run Config.py

# Data

In [197]:
!!python ../OpenNMT-py/preprocess.py --train_src "../data/cornell movie-dialogs corpus/src_movie_lines.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines.txt" --save_data ../data/cornell_raw

['Please backup existing pt files: ../data/cornell_raw.train*.pt, to avoid overwriting them!']

In [198]:
!!python ../OpenNMT-py/preprocess.py --train_src "../data/cornell movie-dialogs corpus/src_movie_lines.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines.txt" --save_data ../data/cornell_raw_min_30_10 --src_words_min_frequency 30 --tgt_words_min_frequency 30 --src_seq_length 10 --tgt_seq_length 10

['Please backup existing pt files: ../data/cornell_raw_min_30_10.train*.pt, to avoid overwriting them!']

In [199]:
vocab_fields = torch.load("../data/cornell_raw_min_30_10.vocab.pt")

In [200]:
src_text_field = vocab_fields["src"].base_field
src_vocab = src_text_field.vocab
src_padding = src_vocab.stoi[src_text_field.pad_token] #stoi: mapping token strings to numerical identifiers.
# ['<unk>', '<blank>', 'I', 'you', 'the', 'to', 'a', 'of', 'and', 'You']
# src_text_field.pad_token : '<blank>'

tgt_text_field = vocab_fields['tgt'].base_field
tgt_vocab = tgt_text_field.vocab
tgt_padding = tgt_vocab.stoi[tgt_text_field.pad_token]

In [201]:
config.src_vocab_size = len(src_vocab)
config.tgt_vocab_size = len(tgt_vocab)
config.src_padding = src_padding
config.tgt_padding = tgt_padding

In [202]:
config.tgt_unk = tgt_vocab.stoi[tgt_text_field.unk_token]
config.tgt_bos = tgt_vocab.stoi[tgt_text_field.init_token]
config.tgt_eos = tgt_vocab.stoi[tgt_text_field.eos_token]

In [203]:
config.src_vocab = src_vocab
config.tgt_vocab = tgt_vocab

# seq2seq-DQN

In [204]:
%run modules/NoisyLinear.py

In [205]:
%run modules/DQN.py

In [206]:
%run modules/Model.py

In [207]:
model = Model(config, DQN)

In [208]:
model.current_model

DQN(
  (encoder_embeddings): Embeddings(
    (make_embedding): Sequential(
      (emb_luts): Elementwise(
        (0): Embedding(1462, 100, padding_idx=1)
      )
    )
  )
  (encoder): RNNEncoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(1462, 100, padding_idx=1)
        )
      )
    )
    (rnn): LSTM(100, 250, bidirectional=True)
  )
  (decoder_embeddings): Embeddings(
    (make_embedding): Sequential(
      (emb_luts): Elementwise(
        (0): Embedding(1472, 100, padding_idx=1)
      )
    )
  )
  (decoder): InputFeedRNNDecoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(1472, 100, padding_idx=1)
        )
      )
    )
    (dropout): Dropout(p=0.0)
    (rnn): StackedLSTM(
      (dropout): Dropout(p=0.0)
      (layers): ModuleList(
        (0): LSTMCell(600, 500)
      )
    )
    (attn): GlobalAttention(
      (linear

In [209]:
%run modules/MSELoss.py

In [210]:
loss = MSELoss(
    nn.MSELoss(reduction="none"),
    model.current_model.generator
)

In [211]:
%run modules/Reward.py

In [212]:
config.rewards = ['BLEU']
config.rewards_weights = [1]    

reward = Reward(config)

In [213]:
lr = 1
torch_optimizer = torch.optim.SGD(model.current_model.parameters(), lr=lr)
optim = onmt.utils.optimizers.Optimizer(torch_optimizer, learning_rate=lr, max_grad_norm=2)

In [214]:
#report_manager = onmt.utils.ReportMgr(report_every=1, start_time=None, tensorboard_writer=None)

In [215]:
from itertools import chain
train_data_file = "../data/cornell_raw_min_30_10.train.0.pt"
train_iter = onmt.inputters.inputter.DatasetLazyIter(dataset_paths=[train_data_file],
                                                     fields=vocab_fields,
                                                     batch_size=1,
                                                     batch_size_multiple=1,
                                                     batch_size_fn=None,
                                                     device="cpu",
                                                     is_train=True,
                                                     repeat=False,
                                                     pool_factor=8192)

# Preload Experience Replay Buffer

if len(model.replay_memory) == 0:
    data = list(train_iter)
    for example in data[:config.PRELOADING_SIZE]:
        model.replay_memory.preload(example.src[0].squeeze(1), example.tgt.squeeze(1), 1)
        model.sample_buffer.preload(example.src[0].squeeze(1), example.tgt.squeeze(1), None)

INFO:root:Loading dataset from ../data/cornell_raw_min_30_10.train.0.pt
INFO:root:number of examples: 101736


In [216]:
%run modules/QLearning.py

In [217]:
trainer = QLearning(config,
                    model,
                    reward=reward,
                    train_loss=loss,
                    valid_loss=loss,
                    optim=optim,
                    gpu_verbose_level=100)
                    #shard_size = 0

In [218]:
#for i in model.target_model.parameters():
#    print(i.abs().sum())

In [219]:
#for i, t in enumerate(model.replay_memory._storage):
#    if t[1].size(0) > 10:
#        print(i, t[1].size(0), t[1])
    
    #if t[1][-1].item() == 96:
    #    print(i, t[1][-1])
    #    print(t[1].size())
    #print((t[1] == 0).sum())
    #print(t[1].size(0))
    
#for x in model.replay_memory._storage:
#    #print([t for t in x[1]])
#    print(' '.join([tgt_vocab.itos[t.item()] for t in x[1]]))

#for i, x in enumerate(data[:config.PRELOADING_SIZE]):
#    print(' '.join([tgt_vocab.itos[token] for token in x.src[0].squeeze().tolist()]))

In [220]:
result = trainer.train(train_steps=400, valid_steps=200)

INFO:root:Start training loop and validate every 200 steps...
INFO:root:Step 1


BETA 0.40006


INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  8,  8,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,  3,
         3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(0.8379, grad_fn=<DivBackward0>)
INFO:root:Step 2


BETA 0.40012000000000003


INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  8,  8,  7,  6,  6,  6,  5,  5,  5,  4,  4,  4,  3,
         3,  3,  3,  3,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(18.2504, grad_fn=<DivBackward0>)
INFO:root:Step 3


BETA 0.40018000000000004


INFO:root:Batch Length: tensor([10, 10, 10,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  3,  3,  2,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(15.8332, grad_fn=<DivBackward0>)
INFO:root:Step 4


BETA 0.40024000000000004


INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  9,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,
         5,  4,  4,  4,  3,  3,  3,  3,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(17.8273, grad_fn=<DivBackward0>)
INFO:root:Step 5


BETA 0.40030000000000004


INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  8,  8,  8,  8,  8,  8,  7,  7,  6,  6,  5,  5,
         5,  5,  4,  4,  4,  4,  4,  3,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(1.1217, grad_fn=<DivBackward0>)
INFO:root:Step 6


BETA 0.40036000000000005


INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  8,  7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,
         5,  5,  5,  4,  4,  3,  3,  3,  3,  3,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(15.2201, grad_fn=<DivBackward0>)
INFO:root:Step 7


BETA 0.40042


INFO:root:Batch Length: tensor([10,  9,  9,  9,  9,  9,  8,  8,  7,  7,  7,  6,  6,  5,  5,  5,  5,  5,
         5,  5,  5,  4,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(6.1455, grad_fn=<DivBackward0>)
INFO:root:Step 8


BETA 0.40048


INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  8,  8,  7,  6,  6,  5,  5,  5,  5,  5,  4,  4,  4,
         4,  3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(48.4273, grad_fn=<DivBackward0>)
INFO:root:Step 9


BETA 0.40054


INFO:root:Batch Length: tensor([9, 9, 9, 9, 8, 8, 7, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4,
        3, 3, 3, 3, 3, 2, 2, 1], dtype=torch.int16)
INFO:root:tensor(6.3792, grad_fn=<DivBackward0>)
INFO:root:Step 10


BETA 0.4006


INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  9,  9,  9,  9,  8,  7,  7,  7,  6,  6,  5,  5,  4,
         4,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(2.6179, grad_fn=<DivBackward0>)
INFO:root:Step 11


BETA 0.40066


INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  8,  8,  8,  8,  7,  7,  6,  6,  6,  5,  5,  5,  4,
         4,  4,  4,  4,  3,  3,  3,  3,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(7.5849, grad_fn=<DivBackward0>)
INFO:root:Step 12


BETA 0.40072


INFO:root:Batch Length: tensor([10, 10, 10, 10, 10, 10, 10,  9,  9,  8,  8,  5,  5,  5,  5,  5,  5,  4,
         4,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(1.6294, grad_fn=<DivBackward0>)
INFO:root:Step 13


BETA 0.40078


INFO:root:Batch Length: tensor([10,  9,  9,  8,  7,  7,  6,  5,  5,  5,  4,  4,  4,  4,  4,  4,  4,  3,
         3,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(20.0777, grad_fn=<DivBackward0>)
INFO:root:Step 14


BETA 0.40084000000000003


INFO:root:Batch Length: tensor([10, 10, 10,  9,  8,  8,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,  4,
         4,  4,  4,  4,  3,  3,  3,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(4.4806, grad_fn=<DivBackward0>)
INFO:root:Step 15


BETA 0.40090000000000003


INFO:root:Batch Length: tensor([10, 10, 10, 10, 10, 10,  9,  9,  9,  8,  6,  6,  6,  5,  5,  5,  5,  4,
         4,  4,  4,  3,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(39.1105, grad_fn=<DivBackward0>)
INFO:root:Step 16


BETA 0.40096000000000004


INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  8,  7,  7,  6,  6,  5,  5,  5,  5,  4,  4,  4,  4,
         4,  4,  4,  3,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(0.8666, grad_fn=<DivBackward0>)
INFO:root:Step 17


BETA 0.40102000000000004


INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  9,  8,  8,  8,  8,  8,  7,  7,  6,  5,  5,  5,
         5,  4,  4,  4,  4,  4,  4,  4,  3,  3,  3,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(50.1139, grad_fn=<DivBackward0>)
INFO:root:Step 18


BETA 0.40108000000000005


INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  9,  8,  7,  7,  6,  5,  5,  5,  5,  5,  5,  5,
         5,  4,  3,  3,  3,  2,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(0.7454, grad_fn=<DivBackward0>)
INFO:root:Step 19


BETA 0.40114


INFO:root:Batch Length: tensor([10,  9,  8,  8,  8,  7,  7,  7,  7,  7,  6,  5,  5,  5,  4,  4,  4,  4,
         4,  4,  4,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(4.0065, grad_fn=<DivBackward0>)
INFO:root:Step 20


BETA 0.4012


INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  9,  8,  8,  7,  7,  7,  7,  5,  5,  5,  4,  3,  3,
         3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(2.4332, grad_fn=<DivBackward0>)
INFO:root:Target Model Updated
INFO:root:Step 21


BETA 0.40126


INFO:root:Batch Length: tensor([10, 10, 10,  9,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  5,  5,  5,  4,
         4,  4,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(9.3643, grad_fn=<DivBackward0>)
INFO:root:Step 22


BETA 0.40132


INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  8,  8,  8,  7,  7,  6,  6,  6,  6,  5,  5,  5,
         5,  5,  4,  4,  4,  3,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(10.7370, grad_fn=<DivBackward0>)
INFO:root:Step 23


BETA 0.40138


INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  8,  8,  7,  7,  7,  7,  7,  6,  6,  5,  5,  5,
         4,  4,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(1.6038, grad_fn=<DivBackward0>)
INFO:root:Step 24


BETA 0.40144


INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  9,  9,  8,  8,  7,  6,  6,  5,  5,  5,  5,  5,  4,
         4,  4,  4,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(11.5348, grad_fn=<DivBackward0>)
INFO:root:Step 25


BETA 0.4015


INFO:root:Batch Length: tensor([10,  9,  9,  9,  9,  8,  8,  8,  8,  7,  7,  7,  6,  6,  6,  5,  5,  5,
         5,  5,  4,  4,  4,  4,  4,  4,  3,  3,  3,  2,  2,  2],
       dtype=torch.int16)
INFO:root:tensor(2.7157, grad_fn=<DivBackward0>)
INFO:root:Step 26


BETA 0.40156000000000003


INFO:root:Batch Length: tensor([10, 10, 10,  8,  8,  8,  7,  6,  6,  6,  6,  5,  5,  5,  5,  4,  3,  3,
         3,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(15.5908, grad_fn=<DivBackward0>)
INFO:root:Step 27


BETA 0.40162000000000003


INFO:root:Batch Length: tensor([10,  9,  7,  6,  6,  6,  6,  6,  6,  5,  4,  4,  4,  4,  4,  4,  4,  4,
         3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(2.4449, grad_fn=<DivBackward0>)
INFO:root:Step 28


BETA 0.40168000000000004


INFO:root:Batch Length: tensor([10, 10, 10,  9,  8,  8,  8,  7,  7,  7,  7,  6,  6,  5,  5,  5,  5,  5,
         5,  5,  5,  4,  4,  4,  4,  3,  3,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(22.8272, grad_fn=<DivBackward0>)
INFO:root:Step 29


BETA 0.40174000000000004


INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  8,  8,  8,  8,  7,  7,  6,  6,  5,  5,  5,  5,  5,
         5,  5,  4,  4,  4,  4,  4,  4,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(6.5571, grad_fn=<DivBackward0>)
INFO:root:Step 30


BETA 0.40180000000000005


INFO:root:Batch Length: tensor([10, 10, 10,  9,  8,  8,  7,  7,  7,  6,  6,  6,  5,  5,  5,  5,  5,  5,
         4,  4,  4,  4,  3,  3,  3,  3,  3,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(3.5597, grad_fn=<DivBackward0>)
INFO:root:Step 31


BETA 0.40186


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  9,  8,  8,  8,  8,  6,  6,  5,  5,  5,  5,  5,  4,  4,  4,
         4,  4,  4,  3,  3,  3,  3,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)


'bout </s> (0.27301207184791565)


INFO:root:Using / Replacing Index 300


'bout </s> (0.11390777677297592)


INFO:root:Using / Replacing Index 301


'bout </s> (0.3593041002750397)


INFO:root:Using / Replacing Index 302


Everything Everything </s> (0.3593041002750397)


INFO:root:Using / Replacing Index 303


Everything Everything </s> (0.12605968117713928)


INFO:root:Using / Replacing Index 304


Everything </s> (0.16036590933799744)


INFO:root:Using / Replacing Index 305


'bout </s> (0.5372849702835083)


INFO:root:Using / Replacing Index 306
INFO:root:Batch Length: tensor([10, 10,  8,  7,  7,  7,  6,  6,  6,  5,  5,  5,  4,  4,  4,  4,  4,  4,
         3,  3,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(7.1903, grad_fn=<DivBackward0>)
INFO:root:Step 32


BETA 0.40192


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([9, 8, 8, 8, 8, 7, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3,
        3, 2, 2, 2, 2, 1, 1, 1], dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10,  9,  9,  9,  9,  8,  8,  8,  7,  6,  6,  6,  6,  6,  5,  4,  4,
         4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(30.8800, grad_fn=<DivBackward0>)
INFO:root:Step 33


BETA 0.40198


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  9,  8,  7,  7,  7,  6,  6,  5,  5,  5,  4,  4,  4,
         4,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  8,  8,  8,  8,  8,  7,  7,  6,  6,  5,  5,  4,  4,
         4,  3,  3,  3,  3,  3,  3,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(1.7257, grad_fn=<DivBackward0>)
INFO:root:Step 34


BETA 0.40204


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  9,  8,  7,  7,  6,  6,  5,  5,  4,  4,  4,  3,  3,
         3,  3,  3,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  9,  9,  7,  7,  7,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,  4,
         4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(11.7933, grad_fn=<DivBackward0>)
INFO:root:Step 35


BETA 0.4021


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  9,  9,  7,  7,  7,  6,  6,  6,  6,  5,  5,  5,  5,  4,  4,
         4,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  9,  8,  8,  8,  7,  7,  7,  6,  6,  6,  5,  5,
         4,  4,  4,  4,  3,  3,  3,  3,  3,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(5.3554, grad_fn=<DivBackward0>)
INFO:root:Step 36


BETA 0.40216


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10,  8,  8,  8,  7,  7,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,
         5,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  2,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  9,  9,  9,  9,  8,  8,  7,  6,  6,  6,  5,  5,  5,  5,  4,  4,  4,
         4,  4,  3,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(2.9078, grad_fn=<DivBackward0>)
INFO:root:Step 37


BETA 0.40222


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([9, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2,
        2, 2, 2, 2, 2, 2, 1, 1], dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10,  9,  9,  8,  8,  8,  8,  7,  7,  7,  6,  6,  5,  5,  4,  4,  4,
         3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(9.6597, grad_fn=<DivBackward0>)
INFO:root:Step 38


BETA 0.40228


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10,  9,  9,  9,  9,  8,  8,  8,  8,  7,  7,  6,  5,  5,  5,  5,  5,  4,
         4,  4,  4,  4,  4,  4,  3,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  9,  8,  8,  7,  7,  7,  7,  6,  5,  5,  4,  3,  3,  3,  3,  3,  3,
         3,  3,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(3.9018, grad_fn=<DivBackward0>)
INFO:root:Step 39


BETA 0.40234000000000003


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  8,  8,  7,  7,  6,  5,  5,  5,  5,  5,  5,  4,  4,  4,  4,
         4,  4,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10,  9,  9,  8,  8,  7,  7,  7,  7,  6,  6,  5,  5,  5,  4,  4,  4,
         4,  4,  3,  3,  3,  3,  2,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(15.3151, grad_fn=<DivBackward0>)
INFO:root:Step 40


BETA 0.40240000000000004


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10,  9,  9,  8,  8,  7,  7,  6,  6,  5,  5,  5,  5,  5,  5,  4,  4,  4,
         3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10,  9,  8,  8,  8,  8,  7,  7,  7,  6,  6,  5,  5,  5,  5,  4,
         3,  3,  3,  3,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(1.1816, grad_fn=<DivBackward0>)
INFO:root:Target Model Updated
INFO:root:Step 41


BETA 0.40246000000000004


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  8,  8,  8,  8,  7,  7,  6,  6,  6,  6,  5,  5,  4,
         4,  4,  3,  3,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  9,  9,  9,  8,  8,  8,  8,  7,  6,  6,  5,  4,  4,  4,  4,  4,  3,
         3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(3.7441, grad_fn=<DivBackward0>)
INFO:root:Step 42


BETA 0.40252000000000004


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10,  9,  9,  8,  7,  7,  7,  7,  7,  6,  6,  6,  6,  5,  5,  5,  5,  4,
         4,  4,  3,  3,  3,  2,  2,  1,  1,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  9,  9,  9,  9,  8,  8,  8,  7,  6,  5,  5,  5,  5,  4,  4,  4,  4,
         4,  4,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(3.9943, grad_fn=<DivBackward0>)
INFO:root:Step 43


BETA 0.40258000000000005


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([8, 8, 8, 8, 8, 7, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2,
        2, 2, 2, 2, 2, 1, 1, 1], dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  9,  8,  8,  8,  8,  8,  7,  7,  6,  6,  5,  5,  5,  5,  5,  4,  4,
         4,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(12.3768, grad_fn=<DivBackward0>)
INFO:root:Step 44


BETA 0.40264


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  7,  7,  6,  5,  5,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,
         2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10,  9,  9,  8,  8,  7,  6,  6,  6,  6,  5,  5,  5,  5,  4,  4,  4,
         4,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(14.1118, grad_fn=<DivBackward0>)
INFO:root:Step 45


BETA 0.4027


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  7,  7,  7,  7,  6,  6,  6,  5,  5,  5,  5,  4,  4,
         4,  3,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  9,  9,  9,  8,  8,  8,  8,  8,  7,  7,  7,  6,  6,  5,  5,  5,  5,
         5,  4,  4,  4,  4,  4,  3,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(72.2351, grad_fn=<DivBackward0>)
INFO:root:Step 46


BETA 0.40276


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  7,  6,  6,  6,  5,  4,  4,  4,  4,  4,  3,  3,  3,
         3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  9,  8,  8,  8,  8,  6,  6,  6,  6,  5,  5,  5,  5,
         5,  5,  5,  5,  4,  3,  3,  3,  3,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(8.5127, grad_fn=<DivBackward0>)
INFO:root:Step 47


BETA 0.40282


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  9,  8,  8,  7,  7,  6,  6,  6,  5,  5,  5,  5,  5,  5,  5,
         5,  4,  3,  3,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10,  9,  9,  8,  8,  6,  5,  5,  5,  5,  5,  5,  5,  4,  4,  4,  4,
         4,  3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(0.9559, grad_fn=<DivBackward0>)
INFO:root:Step 48


BETA 0.40288


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10, 10,  9,  8,  8,  7,  6,  6,  6,  5,  5,  5,  5,  5,
         4,  4,  4,  4,  3,  2,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  9,  9,  9,  9,  9,  8,  8,  6,  6,  6,  5,  5,  5,  5,  5,  5,  5,
         4,  4,  4,  4,  3,  2,  2,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(12.7540, grad_fn=<DivBackward0>)
INFO:root:Step 49


BETA 0.40294


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10,  8,  8,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  5,  5,  5,  5,
         5,  5,  4,  4,  4,  3,  3,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  9,  8,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,
         6,  6,  5,  5,  4,  4,  4,  3,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(31.1160, grad_fn=<DivBackward0>)
INFO:root:Step 50


BETA 0.403


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10,  9,  8,  8,  7,  7,  6,  6,  5,  5,  5,  5,  5,  5,  5,  4,  4,  4,
         4,  4,  4,  3,  3,  3,  3,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  9,  8,  8,  7,  7,  7,  7,  6,  6,  5,  5,  5,  5,  5,  4,  4,  4,
         4,  4,  4,  3,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(9.2426, grad_fn=<DivBackward0>)
INFO:root:Step 51


BETA 0.40306000000000003


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([9, 9, 8, 8, 8, 8, 8, 8, 7, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4, 3, 3, 3, 3, 3,
        3, 2, 2, 2, 2, 2, 2, 1], dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  9,  9,  9,  9,  8,  8,  8,  8,  6,  6,  6,  5,  5,  5,  4,  4,  4,
         4,  4,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(2.1985, grad_fn=<DivBackward0>)
INFO:root:Step 52


BETA 0.40312000000000003


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10,  8,  8,  7,  7,  7,  7,  6,  6,  5,  5,  5,  5,  5,  5,  5,
         4,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10,  9,  8,  7,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  4,
         4,  4,  4,  4,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(61.2424, grad_fn=<DivBackward0>)
INFO:root:Step 53


BETA 0.40318000000000004


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10,  9,  8,  8,  8,  8,  7,  6,  6,  6,  6,  5,  5,  4,  4,  4,  4,  4,
         4,  3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10,  9,  8,  8,  8,  7,  7,  6,  6,  6,  5,  5,  5,  5,  4,  4,
         4,  4,  4,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  2],
       dtype=torch.int16)
INFO:root:tensor(2.2294, grad_fn=<DivBackward0>)
INFO:root:Step 54


BETA 0.40324000000000004


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10,  9,  9,  9,  9,  9,  8,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,
         4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  2,  2,  2,  2],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  8,  8,  8,  8,  8,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,
         4,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(53.4515, grad_fn=<DivBackward0>)
INFO:root:Step 55


BETA 0.40330000000000005


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  9,  8,  7,  7,  7,  7,  7,  5,  5,  4,  4,  4,  3,  3,  3,
         3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  9,  9,  8,  7,  6,  5,  5,  5,  5,  5,  5,  5,  5,
         5,  5,  4,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(2.0651, grad_fn=<DivBackward0>)
INFO:root:Step 56


BETA 0.40336


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10,  9,  9,  8,  8,  8,  7,  7,  6,  5,  5,  5,  5,  5,  5,  5,  5,  5,
         5,  5,  5,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10,  9,  8,  8,  8,  8,  7,  7,  7,  6,  6,  5,  5,  5,  4,  4,
         4,  4,  3,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(7.2997, grad_fn=<DivBackward0>)
INFO:root:Step 57


BETA 0.40342


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  8,  8,  8,  8,  8,  7,  7,  6,  5,  5,  4,  4,  4,
         4,  3,  3,  3,  3,  2,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  8,  8,  8,  8,  8,  6,  6,  5,  5,  5,  5,  5,  5,
         4,  4,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(9.1631, grad_fn=<DivBackward0>)
INFO:root:Step 58


BETA 0.40348


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  9,  9,  8,  8,  8,  7,  7,  6,  6,  6,  6,  5,  5,
         5,  5,  5,  5,  4,  4,  4,  4,  3,  3,  3,  2,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  8,  8,  8,  7,  7,  6,  6,  6,  6,  5,  5,  5,  5,
         5,  5,  4,  4,  4,  4,  4,  4,  3,  3,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(11.7391, grad_fn=<DivBackward0>)
INFO:root:Step 59


BETA 0.40354


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10,  9,  8,  8,  7,  7,  7,  6,  6,  6,  6,  6,  6,  6,  5,  5,
         5,  5,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  2,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  8,  7,  7,  7,  6,  6,  5,  5,  4,  4,  4,  4,  4,
         3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(6.1562, grad_fn=<DivBackward0>)
INFO:root:Step 60


BETA 0.4036


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10,  9,  9,  9,  9,  9,  9,  8,  7,  7,  7,  5,  5,  5,  4,  4,  4,  4,
         4,  3,  3,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10,  9,  9,  9,  8,  8,  7,  7,  6,  6,  6,  5,  5,  5,  5,  5,  5,
         5,  5,  4,  4,  3,  3,  3,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(28.2323, grad_fn=<DivBackward0>)
INFO:root:Target Model Updated
INFO:root:Step 61


BETA 0.40366


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  9,  9,  8,  8,  7,  7,  6,  6,  6,  6,  6,  5,  5,
         5,  5,  5,  5,  4,  4,  4,  4,  4,  3,  3,  2,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  9,  8,  8,  8,  7,  7,  7,  6,  5,  5,  5,  4,
         4,  3,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(5.8638, grad_fn=<DivBackward0>)
INFO:root:Step 62


BETA 0.40372


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  8,  8,  8,  8,  8,  7,  6,  5,  5,  5,  5,  4,  4,  4,  4,
         4,  3,  3,  3,  3,  3,  3,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10,  7,  7,  7,  7,  7,  7,  6,  6,  5,  5,  5,  5,  5,  4,  4,
         4,  4,  4,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(2.6435, grad_fn=<DivBackward0>)
INFO:root:Step 63


BETA 0.40378000000000003


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  8,  8,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  4,  4,
         4,  4,  4,  4,  4,  4,  3,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.10764345526695251)


INFO:root:Using / Replacing Index 307


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.05381409451365471)


INFO:root:Using / Replacing Index 308


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.00430516479536891)


INFO:root:Using / Replacing Index 309


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.0673239529132843)


INFO:root:Using / Replacing Index 310


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.08126306533813477)


INFO:root:Using / Replacing Index 311


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.06833381950855255)


INFO:root:Using / Replacing Index 312


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.08179134130477905)


INFO:root:Using / Replacing Index 313


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.02674984373152256)


INFO:root:Using / Replacing Index 314


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.09093264490365982)


INFO:root:Using / Replacing Index 315


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.10113117098808289)


INFO:root:Using / Replacing Index 316


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.06399610638618469)


INFO:root:Using / Replacing Index 317


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.04088987037539482)


INFO:root:Using / Replacing Index 318


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.0673239529132843)


INFO:root:Using / Replacing Index 319


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.022493846714496613)


INFO:root:Using / Replacing Index 320


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.06399610638618469)


INFO:root:Using / Replacing Index 321


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.00430516479536891)


INFO:root:Using / Replacing Index 322


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.04862652346491814)


INFO:root:Using / Replacing Index 323


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.04862652346491814)


INFO:root:Using / Replacing Index 324


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.04088987037539482)


INFO:root:Using / Replacing Index 325


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.0673239529132843)


INFO:root:Using / Replacing Index 326


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.0673239529132843)


INFO:root:Using / Replacing Index 327


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.09093264490365982)


INFO:root:Using / Replacing Index 328


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.11967409402132034)


INFO:root:Using / Replacing Index 329


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.11967409402132034)


INFO:root:Using / Replacing Index 330


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.035204771906137466)


INFO:root:Using / Replacing Index 331


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.022493846714496613)


INFO:root:Using / Replacing Index 332


<unk> now! now! </s> (0.2795279324054718)


INFO:root:Using / Replacing Index 333


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.005119732581079006)


INFO:root:Using / Replacing Index 334


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.09093264490365982)


INFO:root:Using / Replacing Index 335


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.005119732581079006)


INFO:root:Using / Replacing Index 336


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.022493846714496613)


INFO:root:Using / Replacing Index 337


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.10764345526695251)


INFO:root:Using / Replacing Index 338
INFO:root:Batch Length: tensor([10, 10, 10,  8,  8,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,
         5,  5,  5,  5,  5,  4,  4,  3,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(16.5703, grad_fn=<DivBackward0>)
INFO:root:Step 64


BETA 0.40384000000000003


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  9,  7,  7,  7,  6,  6,  6,  6,  5,  5,  5,  5,  5,
         5,  4,  3,  3,  3,  3,  3,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.08993236720561981)


INFO:root:Using / Replacing Index 339


<unk> </s> (0.1857505738735199)


INFO:root:Using / Replacing Index 340


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.022493846714496613)


INFO:root:Using / Replacing Index 341


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.04088987037539482)


INFO:root:Using / Replacing Index 342


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.022493846714496613)


INFO:root:Using / Replacing Index 343


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.10764345526695251)


INFO:root:Using / Replacing Index 344


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.10063351690769196)


INFO:root:Using / Replacing Index 345


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.022493846714496613)


INFO:root:Using / Replacing Index 346


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.1358305960893631)


INFO:root:Using / Replacing Index 347
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  8,  7,  7,  7,  7,  7,  7,  7,  6,  6,  6,  6,  6,
         5,  5,  5,  4,  4,  4,  3,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(2.6881, grad_fn=<DivBackward0>)
INFO:root:Step 65


BETA 0.40390000000000004


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  9,  9,  8,  7,  7,  7,  6,  6,  5,  5,  5,  5,  5,  4,  4,
         4,  4,  3,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10,  9,  8,  8,  6,  6,  6,  6,  6,  5,  5,  5,  5,  4,  4,  4,  4,
         3,  3,  3,  3,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(16.2032, grad_fn=<DivBackward0>)
INFO:root:Step 66


BETA 0.40396000000000004


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([9, 9, 8, 8, 8, 8, 8, 7, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 2,
        2, 2, 2, 2, 1, 1, 1, 1], dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10,  9,  9,  9,  8,  8,  8,  8,  7,  7,  6,  6,  6,  5,  5,  5,  5,
         5,  4,  4,  4,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(33.2878, grad_fn=<DivBackward0>)
INFO:root:Step 67


BETA 0.40402000000000005


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10,  8,  7,  7,  6,  6,  5,  5,  5,  5,  5,  4,  4,  4,  4,  3,  3,  3,
         3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10,  9,  8,  8,  8,  7,  7,  7,  6,  6,  6,  6,  5,  5,  5,  5,
         4,  4,  4,  4,  4,  4,  3,  3,  3,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(2.9056, grad_fn=<DivBackward0>)
INFO:root:Step 68


BETA 0.40408


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  8,  8,  8,  8,  8,  7,  7,  7,  6,  5,  5,  5,  4,
         4,  4,  4,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)


<unk> </s> (0.472870796918869)


INFO:root:Using / Replacing Index 348


<unk> </s> (1.0)


INFO:root:Using / Replacing Index 349


<unk> </s> (0.472870796918869)


INFO:root:Using / Replacing Index 350


<unk> </s> (0.20556680858135223)


INFO:root:Using / Replacing Index 351


<unk> </s> (0.24446150660514832)


INFO:root:Using / Replacing Index 352


<unk> </s> (0.7071067690849304)


INFO:root:Using / Replacing Index 353


<unk> </s> (0.472870796918869)


INFO:root:Using / Replacing Index 354


<unk> </s> (0.24446150660514832)


INFO:root:Using / Replacing Index 355


<unk> </s> (0.7071067690849304)


INFO:root:Using / Replacing Index 356


<unk> </s> (1.0)


INFO:root:Using / Replacing Index 357


<unk> </s> (0.20556680858135223)


INFO:root:Using / Replacing Index 358
INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  8,  8,  8,  7,  7,  7,  5,  5,  4,  4,  4,  4,  4,
         4,  4,  3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(9.6575, grad_fn=<DivBackward0>)
INFO:root:Step 69


BETA 0.40414


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  9,  8,  8,  7,  7,  5,  5,  5,  4,  4,  4,  4,  4,  3,  3,
         3,  3,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([9, 9, 8, 7, 6, 6, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2,
        1, 1, 1, 1, 1, 1, 1, 1], dtype=torch.int16)
INFO:root:tensor(2.0188, grad_fn=<DivBackward0>)
INFO:root:Step 70


BETA 0.4042


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  9,  9,  8,  8,  7,  6,  5,  5,  5,  5,  5,  5,  5,  4,  4,
         3,  3,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)


mention </s> (0.16036590933799744)


INFO:root:Using / Replacing Index 359


Let </s> (0.1411399245262146)


INFO:root:Using / Replacing Index 360
INFO:root:Batch Length: tensor([10,  9,  9,  9,  8,  8,  8,  7,  7,  7,  7,  6,  6,  5,  5,  5,  5,  4,
         4,  4,  4,  4,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(24.8095, grad_fn=<DivBackward0>)
INFO:root:Step 71


BETA 0.40426


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  9,  9,  8,  8,  8,  8,  6,  5,  5,  5,  5,  5,  5,  4,  4,
         4,  4,  4,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)


<unk> </s> (0.220895916223526)


INFO:root:Using / Replacing Index 361


<unk> <unk> </s> (0.472870796918869)


INFO:root:Using / Replacing Index 362


<unk> </s> (0.1857505738735199)


INFO:root:Using / Replacing Index 363


<unk> </s> (0.1857505738735199)


INFO:root:Using / Replacing Index 364


<unk> </s> (0.1857505738735199)


INFO:root:Using / Replacing Index 365


<unk> </s> (0.29071536660194397)


INFO:root:Using / Replacing Index 366


<unk> </s> (0.29071536660194397)


INFO:root:Using / Replacing Index 367


<unk> </s> (0.27301207184791565)


INFO:root:Using / Replacing Index 368


<unk> </s> (0.21105340123176575)


INFO:root:Using / Replacing Index 369


<unk> </s> (0.21105340123176575)


INFO:root:Using / Replacing Index 370


<unk> </s> (0.15619699656963348)


INFO:root:Using / Replacing Index 371


<unk> </s> (0.16590386629104614)


INFO:root:Using / Replacing Index 372


<unk> </s> (0.27301207184791565)


INFO:root:Using / Replacing Index 373


<unk> </s> (0.3593041002750397)


INFO:root:Using / Replacing Index 374


<unk> </s> (0.24446150660514832)


INFO:root:Using / Replacing Index 375


<unk> <unk> </s> (0.27301207184791565)


INFO:root:Using / Replacing Index 376


<unk> </s> (0.7071067690849304)


INFO:root:Using / Replacing Index 377


<unk> </s> (0.24446150660514832)


INFO:root:Using / Replacing Index 378


<unk> </s> (0.7598356604576111)


INFO:root:Using / Replacing Index 379


<unk> </s> (0.472870796918869)


INFO:root:Using / Replacing Index 380


<unk> </s> (0.12605968117713928)


INFO:root:Using / Replacing Index 381


<unk> </s> (0.20556680858135223)


INFO:root:Using / Replacing Index 382


<unk> </s> (1.0)


INFO:root:Using / Replacing Index 383


<unk> </s> (0.1395079642534256)


INFO:root:Using / Replacing Index 384


<unk> </s> (0.5372849702835083)


INFO:root:Using / Replacing Index 385


<unk> <unk> </s> (0.2626909911632538)


INFO:root:Using / Replacing Index 386


<unk> </s> (0.3593041002750397)


INFO:root:Using / Replacing Index 387


<unk> </s> (0.20556680858135223)


INFO:root:Using / Replacing Index 388


<unk> </s> (0.5372849702835083)


INFO:root:Using / Replacing Index 389


<unk> </s> (0.472870796918869)


INFO:root:Using / Replacing Index 390


<unk> </s> (0.27301207184791565)


INFO:root:Using / Replacing Index 391


<unk> </s> (1.0)


INFO:root:Using / Replacing Index 392
INFO:root:Batch Length: tensor([10, 10,  9,  9,  8,  8,  8,  7,  6,  6,  6,  6,  6,  6,  5,  5,  4,  4,
         4,  4,  4,  4,  3,  3,  3,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(70.4916, grad_fn=<DivBackward0>)
INFO:root:Step 72


BETA 0.40432


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10,  8,  8,  8,  8,  8,  8,  7,  7,  6,  6,  5,  5,  5,  5,  5,  5,  5,
         5,  4,  4,  4,  4,  3,  3,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.04088987037539482)


INFO:root:Using / Replacing Index 393


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.0673239529132843)


INFO:root:Using / Replacing Index 394


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.022493846714496613)


INFO:root:Using / Replacing Index 395


<unk> </s> (0.24446150660514832)


INFO:root:Using / Replacing Index 396


<unk> </s> (0.24446150660514832)


INFO:root:Using / Replacing Index 397


<unk> </s> (0.7071067690849304)


INFO:root:Using / Replacing Index 398


<unk> </s> (0.24446150660514832)


INFO:root:Using / Replacing Index 399


<unk> </s> (0.27301207184791565)


INFO:root:Using / Replacing Index 400


<unk> </s> (0.17747405171394348)


INFO:root:Using / Replacing Index 401


<unk> </s> (0.302137553691864)


INFO:root:Using / Replacing Index 402


<unk> </s> (0.472870796918869)


INFO:root:Using / Replacing Index 403


<unk> </s> (0.15619699656963348)


INFO:root:Using / Replacing Index 404


<unk> </s> (0.1395079642534256)


INFO:root:Using / Replacing Index 405
INFO:root:Batch Length: tensor([10, 10,  9,  9,  8,  8,  8,  7,  7,  6,  6,  6,  6,  6,  6,  5,  4,  4,
         4,  4,  4,  4,  4,  4,  2,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(7.0083, grad_fn=<DivBackward0>)
INFO:root:Step 73


BETA 0.40438


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  8,  8,  8,  7,  7,  7,  7,  7,  7,  6,  6,  6,  5,  5,  4,
         4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  1],
       dtype=torch.int16)


the the the the the the the the the the the the (0.04088987037539482)


INFO:root:Using / Replacing Index 406


<unk> </s> (0.21105340123176575)


INFO:root:Using / Replacing Index 407


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.04088987037539482)


INFO:root:Using / Replacing Index 408


<unk> </s> (0.16590386629104614)


INFO:root:Using / Replacing Index 409


<unk> </s> (1.0)


INFO:root:Using / Replacing Index 410


the the the happy </s> (0.12605968117713928)


INFO:root:Using / Replacing Index 411


<unk> </s> (0.1499110758304596)


INFO:root:Using / Replacing Index 412


<unk> </s> (0.472870796918869)


INFO:root:Using / Replacing Index 413


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.0673239529132843)


INFO:root:Using / Replacing Index 414


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.09726683795452118)


INFO:root:Using / Replacing Index 415


<unk> <unk> <unk> </s> (0.6687403321266174)


INFO:root:Using / Replacing Index 416


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.04862652346491814)


INFO:root:Using / Replacing Index 417


<unk> <unk> </s> (0.11390777677297592)


INFO:root:Using / Replacing Index 418


<unk> </s> (0.1857505738735199)


INFO:root:Using / Replacing Index 419


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.0673239529132843)


INFO:root:Using / Replacing Index 420


<unk> </s> (0.1499110758304596)


INFO:root:Using / Replacing Index 421


<unk> </s> (0.16590386629104614)


INFO:root:Using / Replacing Index 422


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.13755609095096588)


INFO:root:Using / Replacing Index 423


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.09050416201353073)


INFO:root:Using / Replacing Index 424
INFO:root:Batch Length: tensor([10, 10,  9,  9,  9,  9,  9,  8,  6,  5,  5,  5,  5,  5,  4,  4,  4,  4,
         4,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(4.8735, grad_fn=<DivBackward0>)
INFO:root:Step 74


BETA 0.40444


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  9,  9,  8,  8,  8,  7,  6,  6,  5,  5,  5,  5,  5,
         5,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1],
       dtype=torch.int16)


now now now we'll </s> (0.1411399245262146)


INFO:root:Using / Replacing Index 425


now now now now </s> (0.11390777677297592)


INFO:root:Using / Replacing Index 426


you </s> (0.20556680858135223)


INFO:root:Using / Replacing Index 427


you </s> (0.220895916223526)


INFO:root:Using / Replacing Index 428


now </s> (0.27301207184791565)


INFO:root:Using / Replacing Index 429


you </s> (0.16036590933799744)


INFO:root:Using / Replacing Index 430


What </s> (0.1857505738735199)


INFO:root:Using / Replacing Index 431
INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  8,  8,  8,  6,  5,  5,  5,  4,  4,  4,  4,  4,  4,
         4,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(27.9093, grad_fn=<DivBackward0>)
INFO:root:Step 75


BETA 0.4045


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  9,  9,  8,  8,  7,  7,  6,  6,  5,  4,  4,  4,
         3,  3,  3,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)


<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> (0.005119732581079006)


INFO:root:Using / Replacing Index 432
INFO:root:Batch Length: tensor([10, 10,  9,  9,  8,  8,  8,  7,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,
         5,  5,  4,  4,  4,  4,  3,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(1.4254, grad_fn=<DivBackward0>)
INFO:root:Step 76


BETA 0.40456000000000003


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  8,  8,  8,  7,  7,  6,  6,  5,  5,  5,  5,  4,
         4,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  9,  8,  8,  7,  7,  7,  7,  7,  6,  6,  5,  5,
         5,  5,  4,  4,  3,  3,  2,  2,  1,  1,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(17.4498, grad_fn=<DivBackward0>)
INFO:root:Step 77


BETA 0.40462000000000004


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  9,  7,  7,  7,  7,  7,  7,  6,  6,  5,  5,  5,  5,  5,  5,
         5,  4,  4,  4,  4,  4,  4,  3,  3,  3,  2,  2,  2,  2],
       dtype=torch.int16)


<unk> </s> (0.27301207184791565)


INFO:root:Using / Replacing Index 433


<unk> </s> (0.302137553691864)


INFO:root:Using / Replacing Index 434


<unk> </s> (0.5372849702835083)


INFO:root:Using / Replacing Index 435


<unk> </s> (0.5372849702835083)


INFO:root:Using / Replacing Index 436


<unk> </s> (0.302137553691864)


INFO:root:Using / Replacing Index 437


<unk> </s> (0.27301207184791565)


INFO:root:Using / Replacing Index 438


<unk> </s> (1.0)


INFO:root:Using / Replacing Index 439


<unk> </s> (0.7071067690849304)


INFO:root:Using / Replacing Index 440
INFO:root:Batch Length: tensor([10, 10, 10,  8,  8,  8,  8,  7,  7,  7,  5,  5,  5,  5,  5,  5,  4,  4,
         4,  4,  4,  3,  3,  3,  3,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(5.2716, grad_fn=<DivBackward0>)
INFO:root:Step 78


BETA 0.40468000000000004


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  7,  7,  7,  7,  6,  6,  6,  5,  5,  5,  5,  4,  4,
         4,  4,  4,  4,  3,  3,  2,  2,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10,  9,  9,  9,  9,  9,  9,  8,  8,  7,  7,  7,  5,  5,  5,  5,
         5,  5,  4,  4,  4,  3,  3,  3,  3,  3,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(41.7126, grad_fn=<DivBackward0>)
INFO:root:Step 79


BETA 0.40474000000000004


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10,  9,  8,  8,  8,  7,  7,  7,  7,  7,  7,  6,  6,  5,  5,  5,  5,
         5,  5,  4,  4,  4,  3,  3,  3,  2,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10,  9,  9,  9,  8,  8,  8,  7,  7,  6,  6,  5,  5,  5,  5,  5,  5,  5,
         5,  4,  4,  4,  4,  2,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(1.3751, grad_fn=<DivBackward0>)
INFO:root:Step 80


BETA 0.40480000000000005


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10, 10,  9,  9,  9,  8,  8,  8,  8,  8,  7,  6,  6,  5,  5,  5,
         5,  4,  4,  4,  4,  4,  4,  3,  3,  3,  2,  2,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10, 10, 10,  9,  7,  7,  7,  7,  6,  6,  6,  5,  5,  5,
         5,  5,  4,  4,  4,  4,  4,  4,  3,  3,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(29.8150, grad_fn=<DivBackward0>)
INFO:root:Target Model Updated
INFO:root:Step 81


BETA 0.40486


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10,  9,  8,  8,  8,  7,  7,  6,  6,  5,  5,  5,  5,  5,  4,  4,
         4,  4,  4,  3,  3,  3,  3,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)


<unk> </s> (0.12605968117713928)


INFO:root:Using / Replacing Index 441
INFO:root:Batch Length: tensor([10,  9,  9,  8,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  4,  4,  4,  3,
         3,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(0.8210, grad_fn=<DivBackward0>)
INFO:root:Step 82


BETA 0.40492


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 3, 3, 3,
        3, 2, 2, 2, 2, 2, 2, 1], dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10,  8,  8,  7,  7,  7,  7,  6,  5,  5,  5,  5,  5,  5,  5,  4,  4,
         4,  4,  4,  4,  4,  3,  3,  3,  2,  2,  1,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(0.8801, grad_fn=<DivBackward0>)
INFO:root:Step 83


BETA 0.40498


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10, 10,  9,  8,  8,  8,  8,  7,  7,  5,  5,  5,  5,  4,
         4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10, 10,  9,  9,  8,  8,  7,  6,  6,  5,  5,  5,  5,  5,
         4,  4,  4,  4,  4,  3,  3,  3,  3,  2,  2,  2,  2,  1],
       dtype=torch.int16)
INFO:root:tensor(2.5516, grad_fn=<DivBackward0>)
INFO:root:Step 84


BETA 0.40504


INFO:root:Sampling: Collecting new data
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  9,  9,  9,  8,  8,  8,  8,  8,  8,  7,  6,  5,
         5,  5,  4,  3,  3,  3,  3,  3,  2,  2,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:Batch Length: tensor([10, 10, 10, 10, 10,  9,  9,  8,  8,  7,  7,  6,  6,  6,  6,  5,  5,  5,
         5,  4,  4,  4,  4,  3,  3,  3,  3,  3,  2,  1,  1,  1],
       dtype=torch.int16)
INFO:root:tensor(2.5095, grad_fn=<DivBackward0>)


KeyboardInterrupt: 