# HW1: seq2seq nmt

**Homework Goals**

1. Get familiar with text data preparation
2. Learn to work with RNN
3. Train the model to translate `en-->ru`.



In [2]:
import os
import re
import gc
import spacy
import random
import logging
import numpy as np
import unicodedata
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from itertools import islice
from functools import partial
from collections import defaultdict, Counter
from typing import Union, Tuple, List, Dict
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

import warnings
warnings.filterwarnings("ignore")

## Naive way of texts representation:

0. Normalize spelling
1. Filter out all special characters
2. Split by spaces, do *naive tokenization*

In [3]:
class SeqPreproc:
    def __init__(self) -> None:
        self.raw_alphabet = set()
        self.alphabet = set()
    
    @staticmethod
    def normalize(seq: str):
        return "".join(char for char in unicodedata.normalize('NFD', seq) if unicodedata.category(char) != 'Mn')

    def preprocess(self, seq: str, add_sym: bool = True) -> str:
        # adding raw symbols
        if add_sym:
            self.raw_alphabet.update(seq)
        
        # clean up sequence
        seq = self.normalize(seq.lower().strip())
        seq = re.sub(r"[^a-zа-я?.,!]+", " ", seq)
        seq = re.sub(r"([.!?])", r" \1", seq)
        
        if add_sym:
            self.alphabet.update(seq)
        return seq


with open("eng-rus.txt", mode="r", encoding="utf-8") as f:
    sp = SeqPreproc()
    pairs = [tuple(map(sp.preprocess, line.split("\t"))) for line in f.readlines()]

print(f"Alphabet before preprocessing (size - {len(sp.raw_alphabet)}):")
print(*sorted(sp.raw_alphabet), "\n")
print(f"Alphabet after preprocessing (size - {len(sp.alphabet)}):")
print(*sorted(sp.alphabet), "\n")
print("Pairs (few examples):")
print(*pairs[:10])
print(f"Total pairs qantity: {len(pairs)}")

Alphabet before preprocessing (size - 174):

   ! " $ % & ' ( ) + , - . / 0 1 2 3 4 5 6 7 8 9 : ; ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z   « ° º » ã ç é ê î ï ó ö ú ü ́ Ё А Б В Г Д Е Ж З И Й К Л М Н О П Р С Т У Ф Х Ц Ч Ш Щ Ь Э Ю Я а б в г д е ж з и й к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я ё ׁ ​ – — ― ‘ ’ …   ‽ ₂ € № 

Alphabet after preprocessing (size - 62):
  ! , . ? a b c d e f g h i j k l m n o p q r s t u v w x y z а б в г д е ж з и к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я 

Pairs (few examples):
('go .', 'иди .') ('go .', 'идите .') ('hi .', 'здравствуите .') ('hi .', 'привет !') ('hi .', 'хаи .') ('hi .', 'здрасте .') ('hi .', 'здорово !') ('run !', 'беги !') ('run !', 'бегите !') ('run .', 'беги !')
Total pairs qantity: 336666


Each word will be assigned a number + we will need special tokens for the beginning and end of the sequence and for unknown words.
`<SOS>, <EOS>, <UNK>`

We have two languages, to work with each we need functions for translating from words to numbers and vice versa.

It is proposed to implement these functions as dictionaries. Allocate the first 4 numbers for special tokens

**(1 point)** Implement the dictionary building function, the function takes a list of strings (normalized sentences, can be splited by spaces) as input. Organize the dictionary in a reasonable way so that rare words can be thrown out if necessary.

In [4]:
COMMON_TOKENS = ['PAD', 'SOS', 'EOS', 'UNK']


def build_vocabs(sents: tuple, max_size: int = 1000, special_tokens: list = None) -> Tuple[dict]:
    vocab = dict()
    
    for seq in sents:
        for token in seq.split():
            vocab[token] = vocab.get(token, 0) + 1
            
    vocab = {token: qty for token, qty in sorted(vocab.items(), key=lambda tup: tup[1], reverse=True)}
    tokens = special_tokens.copy() if special_tokens else list()
    tokens.extend(list(islice(vocab.keys(), max_size - len(special_tokens))) )
    tok2idx, idx2tok = \
        dict(zip(tokens, range(len(tokens)))), dict(zip(range(len(tokens)), tokens))
    return tok2idx, idx2tok


eng, rus = list(zip(*pairs))
rus2idx, idx2rus = build_vocabs(rus, max_size=10000, special_tokens=COMMON_TOKENS)
eng2idx, idx2eng = build_vocabs(eng, max_size=5000 , special_tokens=COMMON_TOKENS)

print(len(rus2idx), len(idx2rus))
print(len(eng2idx), len(idx2eng))

10000 10000
5000 5000


In [5]:
def sentence2idx(seq: str, tok2idx: dict) -> list():
    """
    Takes sentence as input and returns sequence of tokens indexes
    """
    tokens = SeqPreproc().preprocess(seq=seq).split()
    unk = tok2idx.get("UNK")
    return [tok2idx.get("SOS")] + [tok2idx.get(token, unk) for token in tokens] + [tok2idx.get("EOS")]


def idx2sentence(seq: list, idx2tok: dict) -> str:
    """
    Takes sequence of tokens indexes as input and returns sentence
    """
    return " ".join(idx2tok.get(idx) for idx in seq)

# check the consistency of the transformations
x = sentence2idx('Привет мир!', rus2idx)
print(x)
print(idx2sentence(seq=x, idx2tok=idx2rus))

x = sentence2idx('Hello world!', eng2idx)
print(x)
print(idx2sentence(seq=x, idx2tok=idx2eng))

[1, 2539, 1264, 83, 2]
SOS привет мир ! EOS
[1, 1960, 439, 174, 2]
SOS hello world ! EOS


**Build vocabs using Spacy tokenizer (as example, didn't used in training process)**

In [61]:
class PreprocessSpacy:
    def __init__(self) -> None:
        self.en_tokens, self.ru_tokens = Counter(), Counter()
        
    def __call__(self, line: str, ru_nlp: spacy.lang, en_nlp: spacy.lang, 
                 update_vocabs: bool = True) -> None:
        en_seq, ru_seq = line.split("\t")
        en_seq, ru_seq = \
            self.preprocess(seq=en_seq, nlp=en_nlp), self.preprocess(seq=ru_seq, nlp=ru_nlp)
        
        if update_vocabs:
            self.en_tokens.update(en_seq)
            self.ru_tokens.update(ru_seq)
        return en_seq, ru_seq
    
    @staticmethod
    def normalize(seq: str):
        return "".join(char for char in unicodedata.normalize('NFD', seq) if unicodedata.category(char) != 'Mn')
    
    def preprocess(self, seq: str, nlp: spacy.lang) -> str:
        # text normalization and removing digits and symbols
        seq = self.normalize(seq.lower().strip())
        seq = re.sub(r"[^a-zа-я?.,!']+", " ", seq)
        seq = re.sub(r"([.!?])", r" \1", seq)
        
        # split into the tokens
        seq = [token.text for token in nlp(seq)]
        return seq
    
    def build_vocabs(self, max_size: Union[int, Tuple[int]] = None, 
                     common_tokens: List[str] = None) -> Tuple[dict]:
        if not isinstance(max_size, tuple):
            en_max_size, ru_max_size = max_size
        else:
            en_max_size, ru_max_size = max_size
            
        en_vocab = [token[0] for token in self.en_tokens.most_common(en_max_size)]
        ru_vocab = [token[0] for token in self.ru_tokens.most_common(ru_max_size)]
        
        if isinstance(common_tokens, list) and len(common_tokens) > 0:
            en_vocab = [*common_tokens, *en_vocab]
            ru_vocab = [*common_tokens, *ru_vocab]
            
        en_idxes = range(len(en_vocab))
        ru_idxes = range(len(ru_vocab))
        
        eng2idx, idx2eng, rus2idx, idx2rus = \
            dict(zip(en_vocab, en_idxes)), dict(zip(en_idxes, en_vocab)), \
            dict(zip(ru_vocab, ru_idxes)), dict(zip(ru_idxes, ru_vocab))
        return eng2idx, idx2eng, rus2idx, idx2rus
    

# also we can use spacy for tokenization (very precise approach)
# load SpaCy model for both languages
ru_nlp = spacy.load("ru_core_news_lg")
en_nlp = spacy.load("en_core_web_lg")

# iterrate over pairs of sequences and tokenize them (without lemmatization, cause we need suffixes also)
# but we will keep normalization and make all tokens lower case
# vocab will be larger, but it's OK. We need good quality at the end
with open("eng-rus.txt", mode="r", encoding="utf-8") as f:
    spcy_builder = PreprocessSpacy()
    pairs_spcy = [spcy_builder(line=line, ru_nlp=ru_nlp, en_nlp=en_nlp) for line in f.readlines()]
    
print("EN vocab size: {}".format(len(spcy_builder.en_tokens)))
print("RU vocab size: {}".format(len(spcy_builder.ru_tokens)))

# bild vocabs
eng2idx_spcy, idx2eng_spcy, rus2idx_spcy, idx2rus_spcy = \
    spcy_builder.build_vocabs(max_size=(5000, 10000), common_tokens=['PAD', 'SOS', 'EOS', 'UNK'])

EN vocab size: 15455
RU vocab size: 51050


## Dealing with arbitrary length sequences in pytorch

We need to be able to generate batches of `[bs, 1, seq_len]` tensors.
But in our dataset, the samples are of different lengths:

- we could cut everything down to the minimum length
- padd to maximum length
- choose some average length

**(1 point)** Split the dataset on train and validate:

In [6]:
# make a dataset with encoded pairs:
class EngRusDataset(Dataset):
    def __init__(self, pairs: List[tuple], pad_to: int = None, pad_value: int = 0, 
                 pad_left: bool = False) -> None:
        self.pairs = pairs
        self.pad_to = pad_to
        self.pad_value = pad_value
        self.pad_left = pad_left
    
    def __len__(self) -> int:
        return len(self.pairs)
    
    def transform(self, seq: list) -> torch.tensor:
        if self.pad_to is not None and len(seq) != self.pad_to:
            if len(seq) > self.pad_to:
                seq = seq[:self.pad_to]
            else:
                n_pads = self.pad_to - len(seq)
                seq = [self.pad_value]*n_pads*self.pad_left + \
                      seq + \
                      [self.pad_value]*n_pads*(not self.pad_left)       
        return torch.tensor(seq, dtype=torch.int)
        
    def __getitem__(self, item: int) -> Dict:
        eng, rus = self.pairs[item]
        eng, rus = self.transform(seq=eng), self.transform(seq=rus)
        return dict(
            eng=eng,
            rus=rus,
        )

encoded = []
for eng, rus in tqdm(pairs):
    a = sentence2idx(eng, eng2idx)
    b = sentence2idx(rus, rus2idx)
    encoded.append((a, b))


train_pairs, eval_pairs = train_test_split(encoded, train_size=0.8, random_state=42, shuffle=True)
trainset = EngRusDataset(pairs=train_pairs, pad_to=8, pad_left=True)
evalset = EngRusDataset(pairs=eval_pairs, pad_to=8, pad_left=True)

print("Train size:", trainset.__len__(), "\nEval size:", evalset.__len__())

HBox(children=(FloatProgress(value=0.0, max=336666.0), HTML(value='')))


Train size: 269332 
Eval size: 67334


Let's build a naive DataLoader and check how it makes batches:

In [7]:
trainloader = DataLoader(trainset, batch_size=8, shuffle=True)
it = iter(trainloader)

In [108]:
batch = next(it)['eng']
batch

tensor([[   0,    1,    5,  140,   95,  381,    4,    2],
        [   1,  131,  154,  476, 3074,   48,  491,  175],
        [   0,    1,   22,   11,  472,  297,    4,    2],
        [   1,    5,   41,    5,   49,  111,   13,    4],
        [   1,    6,   86,   11,   29,   23,   28,  319],
        [   1,    6,  255,   19,  311,   12,  547,    4],
        [   0,    1,    5,  222,   11,  911,    4,    2],
        [   1,    5,   22,   11,  259,   45,   63,   91]], dtype=torch.int32)

In my case, the result was:
```
[tensor([1, 1, 1, 1, 1, 1, 1, 1]),
 tensor([ 6,  7,  6, 15,  5,  6,  5, 62]),
 tensor([ 48,  34,  83,   7,  32, 221,  22,  43]),
 tensor([  5, 143,  37,  36, 129,  12,  11,  66]),
 tensor([  73, 1258,  279,    8,    6,  555,   41,   10]),
 tensor([  8, 140,   8, 628,  20,  96,  13, 270]),
 tensor([  47,    4,   15,   18,   55,  269,    6, 1287]),
 tensor([ 58,   2,  13, 140, 193, 140, 171, 140])]
```

What's weird here?
1. This is not a tensor, but a list of tensors. Accordingly, when iterating over zero dimension (`batch[i, :]`), we will get not an i-example, but i-tokens for all examples in the batch. This is not a problem, but different from the expected behavior.
2. Only one example ends with `<EOS>` (2), the others are cut off to match its length. And this is a problem.

We would like to padd all examples to the maximum length in the batch.
But at the stage of preparing the example (in the `__getitem__` function), we do not know the batch neighbors!
In order to change the batch merging logic, we need to write our own `collate_fn` function in the DataLoader constructor:

```
def collate_fn(samples):
    # samples -- list of dictionaries samples
    <...>
    return batch
```

**(1 point)** Write a `collate_fn` function that padds _correctly_ rus and eng sequences and merges them into batches, where `batch[i, :]` returns the tokens for the `i` example.

Expected output (for a sequence with left padding):

```
tensor([[   1,   10, 3429,  405,  113,  676,   10, 1031,  140,    4,    2],
        [   0,    1,   57,   18,   23,   19,   61,    7,  140,    4,    2],
        [   0,    0,    0,    1,   16,   17, 1131,  416,  140,    4,    2],
        [   0,    0,    0,    1,   13,  465,   75,  197,  140,    4,    2],
        [   0,    0,    0,    1,    6,  302,   13,  144,  140,    4,    2],
        [   0,    1,    6,   59,  205,  167,    8,   15,  140,    4,    2],
        [   0,    0,    0,    0,    1,    6,   14,  678,  140,    4,    2],
        [   0,    0,    1,    5,   29,   67,    6,   14,  140,    4,    2]])
```

In [8]:
class EngRusCollate:
    def __init__(self, padding_value: int = 0, batch_first: bool = True) -> None:
        self.padding_value = padding_value
        self.batch_first = batch_first
        
    def __call__(self, batch: List[dict]) -> Dict[str, torch.tensor]:
        # getting max length
        data = dict(
                eng=list(),
                rus=list()
            )
        
        for seq in batch:
            data["eng"].append(torch.flip(seq.get("eng"), dims=(0, )))
            data["rus"].append(seq.get("rus"))
        return {k: torch.flip(pad_sequence(v, batch_first=self.batch_first, 
                                           padding_value=self.padding_value), dims=(1, )) \
                if k =="eng" else \
                   pad_sequence(v, batch_first=self.batch_first, 
                                padding_value=self.padding_value).to(torch.int64) for k, v in data.items()}
    
    
trainset = EngRusDataset(pairs=train_pairs)
evalset = EngRusDataset(pairs=eval_pairs)

trainloader = DataLoader(trainset, batch_size=8, shuffle=True, collate_fn=EngRusCollate())
evalloader = DataLoader(evalset, batch_size=8, shuffle=False, collate_fn=EngRusCollate())

print(len(trainloader), len(evalloader))

33667 8417


In [70]:
eng, rus = next(iter(trainloader)).values()

In [292]:
eng

tensor([[   0,    0,    0,    0,    0,    1,    7,   34,  227, 4960,    4,    2],
        [   0,    0,    0,    1,    5,  360,   56,    6,   20,  725,    4,    2],
        [   0,    1,    5,   22,   11,   41,    5,   72,    8,   47,    4,    2],
        [   0,    0,    0,    0,    1,    6,   59,  262,   66,  187,    4,    2],
        [   1,   15,    7,   36,    8,   93,   23,    5,   51,  100,    9,    2],
        [   0,    0,    1,   16,  388,   33, 1471,   85,   94,  303,    4,    2],
        [   1,    5,  139,    6,   74,   19,   73,  233,   44,  123,    4,    2],
        [   0,    0,    1,    6,   60,  126,   21,   12,  330,  370,    4,    2]],
       dtype=torch.int32)

In [360]:
for idx in range(eng.shape[0]):
    sentence = eng[idx, :].tolist()
    print(idx2sentence(seq=sentence, idx2tok=idx2eng))

PAD PAD PAD PAD PAD SOS you are two faced . EOS
PAD PAD PAD SOS i wonder why tom was fired . EOS
PAD SOS i don t think i need to go . EOS
PAD PAD PAD PAD SOS tom has lost all hope . EOS
SOS do you want to see what i ve got ? EOS
PAD PAD SOS it seems my dreams never come true . EOS
SOS i asked tom where he had bought his car . EOS
PAD PAD SOS tom will leave in a few days . EOS


In [293]:
rus

tensor([[   1,   12,    3,    4,    2,    0,    0,    0,    0,    0,    0],
        [   1,  282,   45,   21, 1381,    4,    2,    0,    0,    0,    0],
        [   1,    6,   40,    9,   15,   64,  157,    4,    2,    0,    0],
        [   1,    7,  364, 8131, 2823,    4,    2,    0,    0,    0,    0],
        [   1,  116, 2327,    9,   16,   22,   42,    8,    2,    0,    0],
        [   1,  296,   79, 2885,   59,    6,    3,    4,    2,    0,    0],
        [   1,    5,  295,   16,  313,   63,   18,  177,  178,    4,    2],
        [   1,    7, 2987,  263,  193,  690,    4,    2,    0,    0,    0]],
       dtype=torch.int32)

In [359]:
for idx in range(rus.shape[0]):
    sentence = rus[idx, :].tolist()
    print(idx2sentence(seq=sentence, idx2tok=idx2rus))

SOS ты UNK . EOS PAD PAD PAD PAD PAD PAD
SOS интересно, почему тома уволили . EOS PAD PAD PAD PAD
SOS не думаю, что мне нужно идти . EOS PAD PAD
SOS том потерял всякую надежду . EOS PAD PAD PAD PAD
SOS хочешь посмотреть, что у меня есть ? EOS PAD PAD
SOS похоже, мои мечты никогда не UNK . EOS PAD PAD
SOS я спросил у тома, где он купил машину . EOS
SOS том уедет через несколько днеи . EOS PAD PAD PAD


Now we have the correct data generator, and all we have to do is write the model (encoder and decoder).


### Encoder

The input tensor contains integers and has dimensions `[bs, seq_len]`,

We will pass them through the layer with embeddings and get the tensor `[bs, seq_len, dim]`. Now we have floating point numbers that can be fed to RNN layers as input.



GRU is an RNN with a specific structure:
<img src="https://habrastorage.org/webt/xt/_q/nj/xt_qnjgfjengqoqd4gizkq4j_wk.png">

In the picture, the yellow rectangles are the line layers with the corresponding activation functions.


`nn.RNN` allows you to create and use multi-layer one- and two-way layers as one layer.
All parameters must be specified during creation, and then simply applied during the forward pass.


The order of dimensions is a bit different from the usual in convolutional networks, this is due to the inability to parallel recurrent calculations effectively.


**batch_first=True**

Such an RNN layer expects two tensors as input:
  - input with sizes `[bs, seq_len, dim]`,
  - hidden_state with dimensions `[num_layers * num_directions, bs, hidden_size]`.
 
 
The output is two tensors:
- output `[bs, seq_len, dim]`,
- hidden `[num_layers * num_directions, bs, hidden]`.

We will apply RNN in two ways:
- to the entire sequence, to translate the entire phrase in one language into one vector (EncoderRNN)
- to one tensor and input token to generate a phrase in another language (DecoderRNN)


We will put the entire input sequence into a hidden state vector.

In [9]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size: int, vocab_size: int, layers: int = 1) -> None:
        super().__init__()
        self.layers = layers
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        
        # initialize the embeddings with "hidden_size" size for each token in the vocab
        # each token initialized from standard normal distribution N(0, 1)
        self.embeddings = nn.Embedding(num_embeddings=vocab_size, embedding_dim=hidden_size)
        
        # initialize RNN model with "layers" num layers of GRU cells. "Bidirectional" argument is not
        # specified -> the model is one directional
        self.rnn = nn.GRU(input_size=hidden_size, hidden_size=hidden_size, num_layers=layers, 
                          batch_first=True)
        
    def forward(self, input_: torch.tensor, hidden: torch.tensor) -> Tuple[torch.tensor]:
        # getting tensor of shape [batch_size, n_sequences, embedings size]
        # it's X data to feed to GRU cell
        embedded = self.embeddings(input_)
        
        # feed through rnn embedings and hidden
        output, hidden = self.rnn(embedded, hidden)
        return output, hidden
    
    def init_hidden(self, batch_size: int = 1, device: str = None) -> torch.tensor:
        """
        Method initializes the first hidden vector with shape (D∗num_layers, N, H) to feed to first GRU cell.
        D = 2 if bidirectional otherwise 1, N - batch size, H - hidden size.
        All values are equal to 0
        """
        # be aware about dimension! https://pytorch.org/docs/stable/nn.html#torch.nn.GRU
        return torch.zeros(self.layers, batch_size, self.hidden_size, device=device)

# initialize the encoder with hidden size 256
enc = EncoderRNN(hidden_size=256, vocab_size=len(eng2idx))

x = next(iter(trainloader))["eng"]
print(x.shape)

# first hidden vector is a vector of zeroes
# so, initialize this vector
hidden = enc.init_hidden(batch_size=x.shape[0])
output, hidden = enc(x, hidden)
print(output.shape, hidden.shape)

torch.Size([8, 11])
torch.Size([8, 11, 256]) torch.Size([1, 8, 256])


We want the decoder to generate a translation for us -- a sequence of tokens from another language, using the encoder's hidden state vector.

To do this, we will supply hidden and `<SOS>`token to the input.
At each step, the decoder will return hidden and output vector.
Output vector is the probability distribution for the next token (respectively, it has the size of the output language dictionary).

In [10]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size: int, vocab_size: int, layers: int = 1, p_classifier: float = 0.1) -> None:
        super().__init__()
        self.layers = layers
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        
        # initialize the embeddings with "hidden_size" size for each token in the vocab
        # each token initialized from standard normal distribution N(0, 1)
        self.embeddings = nn.Embedding(num_embeddings=vocab_size, embedding_dim=hidden_size)
        
        # initialize onedirectional RNN of decoder with GRU cells
        self.rnn = nn.GRU(input_size=hidden_size, hidden_size=hidden_size, num_layers=layers, 
                          batch_first=True)
        
        # finally we have to classify our tokens with Linear layer at the end
        self.classifier = nn.Sequential(
            nn.Dropout(p=p_classifier),
            nn.Linear(in_features=hidden_size, out_features=vocab_size)
        )
        self.softmax = nn.LogSoftmax(dim=2)
        
    def forward(self, input_: torch.tensor, hidden: torch.tensor) -> Tuple[torch.tensor]:
        embedded = self.embeddings(input_)
        output, hidden = self.rnn(embedded, hidden)
        output = self.softmax(self.classifier(output))
        return output, hidden
    
    def init_hidden(self, batch_size: int = 1, device: str = None) -> torch.tensor:
        """
        Method initializes the first hidden vector with shape (D∗num_layers, N, H) to feed to first GRU cell.
        D = 2 if bidirectional otherwise 1, N - batch size, H - hidden size.
        All values are equal to 0
        """
        # be aware about dimension! https://pytorch.org/docs/stable/nn.html#torch.nn.GRU
        return torch.zeros(self.layers, batch_size, self.hidden_size, device=device)


In [73]:
dec = DecoderRNN(hidden_size=256, vocab_size=len(rus2idx))

Let's get a tensor with tokens of size `[bs, seq_len]` from the data generator and try to iterate over seq_len to generate the next token.

In [74]:
batch = next(iter(trainloader))["rus"] # get batch
bs, seq_len = batch.shape
hidden = dec.init_hidden(batch_size=bs)

for i in range(0, seq_len):
    step = batch[:, i].unsqueeze(1)  # get tokens sample for i-th step 
     # These are the correct tokens (ground truth), we could generate them
     # unsqueeze adds dimension 1 (from [bs] to [bs, 1])
    
    output, hidden = dec(step, hidden)
    print(output.shape, hidden.shape)
    # output -- this is the probability distribution for the next token
    # hidden -- this is the updated hidden state

torch.Size([8, 1, 10000]) torch.Size([1, 8, 256])
torch.Size([8, 1, 10000]) torch.Size([1, 8, 256])
torch.Size([8, 1, 10000]) torch.Size([1, 8, 256])
torch.Size([8, 1, 10000]) torch.Size([1, 8, 256])
torch.Size([8, 1, 10000]) torch.Size([1, 8, 256])
torch.Size([8, 1, 10000]) torch.Size([1, 8, 256])
torch.Size([8, 1, 10000]) torch.Size([1, 8, 256])
torch.Size([8, 1, 10000]) torch.Size([1, 8, 256])
torch.Size([8, 1, 10000]) torch.Size([1, 8, 256])


**(6 points)** Fill in a training part and train the encoder and decoder.

1. You need to write getting the next token (integer) from the distribution: a vector of size `len(rus2idx)`. Since we are working in batches, this should be a batchified operation. You have several options for how to do this:
 - take by argmax
 - sample from distribution (torch.multinomial)
 - during training, take tokens from ground truth (and this must be done at least sometimes so that the model converges).
 
2. You need to write a loss calculation. It is convenient to do this at each step: after the `<EOS>` occurs in the example, you do not need to count the loss for it (in the vectorized version, you can multiply the loss for `<PAD>`-tokens by zero - this is called masking). Loss is simply the sum of cross-entropy losses for each step.


In [12]:
logging.basicConfig(
    level=logging.INFO, 
    format='[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
)
LOGGER = logging.getLogger(__name__)


class Eng2RusTrainModel:
    def __init__(self, encoder_vocab_size: int, decoder_vocab_size: int, hiden_size: int = 256, 
                 device: str = "cuda", initial_lr: Union[int, float] = 1e-3, patience: int = 10,
                 factor: float = 0.5, writer_path: str = "./runs/", max_gradient_norm: int = 4) -> None:
        self.device = device if torch.cuda.is_available() else "cpu"
        
        # model initialization
        self.encoder = EncoderRNN(hidden_size=hiden_size, vocab_size=encoder_vocab_size).to(self.device)
        self.decoder = DecoderRNN(hidden_size=hiden_size, vocab_size=decoder_vocab_size).to(self.device)
        
        # loss function
        self.criterion = nn.CrossEntropyLoss(reduction="none")
        
        # optimizer
        training_params = list(self.encoder.parameters()) + list(self.decoder.parameters())
        self.optimizer = \
            torch.optim.Adam(params=training_params, lr=initial_lr)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer=self.optimizer, mode="min", patience=patience, factor=factor)
        self.max_gradient_norm = max_gradient_norm
        
        # summary writer to display using tendorboard
        self.writer = SummaryWriter(writer_path)
    
    def fit(self, trainloader: DataLoader, evalloader: DataLoader, n_epochs: int = 5, 
            teacher_forcing_ratio: float = 0.5, verbose_step: int = 100, pad_token: int = 0) -> None:
        LOGGER.info("Iterate over {} epochs".format(n_epochs))
        for epoch in range(n_epochs):
            LOGGER.info("Epoch [{}]/[{}]".format(epoch + 1, N_EPOCHS))
            
            curr_lr = self.scheduler.optimizer.param_groups[0]['lr']
            LOGGER.info("LR:\t{}".format(curr_lr))
            if self.writer is not None:
                self.writer.add_scalar(tag="Learning rate", scalar_value=curr_lr, global_step=epoch)
                
            train_loss_avg = self.train_nn(
                trainloader=trainloader, teacher_forcing_ratio=teacher_forcing_ratio, 
                verbose_step=verbose_step, pad_token=pad_token)
            
            with torch.no_grad():
                eval_loss_avg = self.eval_nn(
                    evalloader=evalloader, verbose_step=verbose_step, pad_token=pad_token)
            
            if self.writer is not None:
                self.writer.add_scalar(tag=os.path.join("Loss", "Train"), 
                                       scalar_value=train_loss_avg, global_step=epoch)
                self.writer.add_scalar(tag=os.path.join("Loss", "Eval"), 
                                       scalar_value=eval_loss_avg, global_step=epoch)

            LOGGER.info("Train loss avg:\t{:.6f}".format(train_loss_avg))
            LOGGER.info("Eval loss avg:\t{:.6f}".format(eval_loss_avg))
            
            self.scheduler.step(eval_loss_avg)
            
            # clean up cache
            if self.device == "cuda":
                LOGGER.info("Clean up CUDA memory") 
                torch.cuda.empty_cache()
                gc.collect()
        return
    
    def train_nn(self, trainloader: DataLoader, teacher_forcing_ratio: float = 0.5, 
                 verbose_step: int = 100, pad_token: int = 0) -> float:
        LOGGER.info("Train loop")
        self.encoder.train()
        self.decoder.train()
        
        running_loss = list()
        for i, batch in tqdm(enumerate(trainloader), total=len(trainloader)):
            eng = batch['eng'].to(self.device)
            rus = batch['rus'].to(self.device)
        
            # rus sequence length and batch size
            batch_size, rus_seq_length = rus.shape

            encoder_hidden = \
                self.encoder.init_hidden(batch_size=batch_size, device=self.device)
            _, hidden = self.encoder(eng, encoder_hidden)

            # write decoder application and loss calculation.
            # hint: loss must be masked, in case the sequence has already ended.

            # the first token for the decoder input is <SOS>
            decoder_input = rus[:, 0].unsqueeze(1)

            loss = 0
            for token in range(1, rus_seq_length):
                # forward single decoder GRU cell
                decoder_outputs, hidden = self.decoder(decoder_input, hidden)

                # now we have to decide, which token should we use for the next GRU cell of the decoder:
                # token from target rus sequence or output from the previous GRU cell
                teacher_force = random.random() < teacher_forcing_ratio
                decoder_input = rus[:, token].unsqueeze(1) if teacher_force else decoder_outputs.argmax(2)

                curr_loss = self.criterion(decoder_outputs.view(batch_size, -1), rus[:, token])
                curr_loss = torch.masked_select(curr_loss, rus[:, token] != pad_token).mean()
                loss += curr_loss.mean()

            loss = loss / (rus_seq_length - 1)
            
            # calculate gradients
            loss.backward()
            
            # clip gradients and update weights
            torch.nn.utils.clip_grad_norm_(
                self.decoder.parameters(), max_norm=self.max_gradient_norm)
            torch.nn.utils.clip_grad_norm_(
                self.encoder.parameters(), max_norm=self.max_gradient_norm)
            self.optimizer.step()
            
            # set gradients to zero
            self.optimizer.zero_grad()

            loss = loss.cpu().item()
            running_loss.append(loss)
        
            if verbose_step is not None and not i % verbose_step:
                LOGGER.info("Iteration [{}]/[{}] loss: {:.6f}".format(i, len(trainloader), loss))
        return np.mean(running_loss)
    
    def eval_nn(self, evalloader: DataLoader, verbose_step: int = 100, pad_token: int = 0) -> float:
        LOGGER.info("Eval loop")
        self.encoder.eval()
        self.decoder.eval()
        
        running_loss = list()
        for i, batch in tqdm(enumerate(evalloader), total=len(evalloader)):
            eng = batch['eng'].to(self.device)
            rus = batch['rus'].to(self.device)

            # rus sequence length and batch size
            batch_size, rus_seq_length = rus.shape

            encoder_hidden = self.encoder.init_hidden(batch_size=batch_size, device=self.device)
            _, hidden = self.encoder(eng, encoder_hidden)

            # the first token for the decoder input is <SOS>
            decoder_input = rus[:, 0].unsqueeze(1)

            loss = 0
            for token in range(1, rus_seq_length):
                # forward single decoder GRU cell
                decoder_outputs, hidden = self.decoder(decoder_input, hidden)
                decoder_input = decoder_outputs.argmax(2)

                curr_loss = self.criterion(decoder_outputs.view(batch_size, -1), rus[:, token])
                curr_loss = torch.masked_select(curr_loss, rus[:, token] != pad_token).mean()
                loss += curr_loss.mean()

            loss = loss / (rus_seq_length - 1)
            loss = loss.cpu().item()
            running_loss.append(loss)

            if verbose_step is not None and not i % verbose_step:
                LOGGER.info("Iteration [{}]/[{}] loss: {:.6f}".format(i, len(evalloader), loss))
                
        return np.mean(running_loss)


In [114]:
# launch TensorBoard
# tensorboard.exe --logdir ./runs/

In [13]:
LR = 1e-3
BATCH_SIZE = 128
HIDEN_SIZE = 256
N_EPOCHS = 120
PATIENCE = 5
FACTOR = 0.5
WRITER_PATH = "./runs/"
MAX_NORM = 4.0
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# initialize DataLoaders
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=EngRusCollate())
evalloader = DataLoader(evalset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=EngRusCollate())

en2ru_train = Eng2RusTrainModel(encoder_vocab_size=len(eng2idx), decoder_vocab_size=len(rus2idx), 
                                hiden_size=HIDEN_SIZE, device=DEVICE, initial_lr=LR, patience=PATIENCE,
                                factor=FACTOR, writer_path=WRITER_PATH, max_gradient_norm=MAX_NORM)
en2ru_train.fit(trainloader=trainloader, evalloader=evalloader, n_epochs=N_EPOCHS, teacher_forcing_ratio=0.5, 
                verbose_step=300, pad_token=0)

[2022-12-10 21:49:39,789] {<ipython-input-12-cb52601f4b8e>:34} INFO - Iterate over 120 epochs
[2022-12-10 21:49:39,790] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [1]/[120]
[2022-12-10 21:49:39,791] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-10 21:49:39,791] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-10 21:49:40,125] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 9.193022
[2022-12-10 21:50:12,842] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 3.566121
[2022-12-10 21:50:58,296] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 2.993036
[2022-12-10 21:53:01,367] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 3.214468
[2022-12-10 21:55:58,475] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 3.828656
[2022-12-10 21:59:04,128] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 2.853592
[2022-12-10 22:02:15,243] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 2.568382
[2022-12-10 22:05:19,448] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 2.614114
[2022-12-10 22:05:21,631] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-10 22:05:21,784] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 3.143227
[2022-12-10 22:06:30,677] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 3.201193
[2022-12-10 22:07:22,347] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	3.396350
[2022-12-10 22:07:22,348] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	3.444287
[2022-12-10 22:07:22,349] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-10 22:07:22,650] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [2]/[120]





[2022-12-10 22:07:22,651] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-10 22:07:22,652] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-10 22:07:23,282] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 2.957986
[2022-12-10 22:10:48,169] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 2.783006
[2022-12-10 22:14:13,024] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 2.398493
[2022-12-10 22:17:38,998] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 3.052244
[2022-12-10 22:21:04,943] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 2.526034
[2022-12-10 22:24:28,481] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 2.343269
[2022-12-10 22:27:50,903] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 2.453805
[2022-12-10 22:31:15,232] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 2.146279
[2022-12-10 22:31:17,992] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-10 22:31:18,207] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.931069
[2022-12-10 22:32:27,190] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.847935
[2022-12-10 22:33:25,740] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	2.738093
[2022-12-10 22:33:25,742] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	3.130514
[2022-12-10 22:33:25,742] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-10 22:33:26,095] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [3]/[120]
[2022-12-10 22:33:26,096] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-10 22:33:26,097] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-10 22:33:26,382] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 3.306774
[2022-12-10 22:36:49,987] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.802398
[2022-12-10 22:40:15,031] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 2.516272
[2022-12-10 22:43:41,850] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 2.226439
[2022-12-10 22:47:06,721] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 3.339954
[2022-12-10 22:50:35,495] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 2.125281
[2022-12-10 22:54:01,948] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 2.383549
[2022-12-10 22:57:27,636] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 2.104862
[2022-12-10 22:57:29,700] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-10 22:57:29,919] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.660306
[2022-12-10 22:58:37,536] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.730677
[2022-12-10 22:59:28,948] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	2.403928
[2022-12-10 22:59:28,950] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	3.011295
[2022-12-10 22:59:28,950] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-10 22:59:29,265] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [4]/[120]





[2022-12-10 22:59:29,267] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-10 22:59:29,267] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-10 22:59:30,337] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 2.556731
[2022-12-10 23:02:52,141] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 2.103615
[2022-12-10 23:06:15,866] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 2.098202
[2022-12-10 23:09:41,571] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 2.303406
[2022-12-10 23:13:09,803] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.593078
[2022-12-10 23:16:37,970] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 2.644880
[2022-12-10 23:20:02,589] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 2.375202
[2022-12-10 23:23:33,596] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 2.120414
[2022-12-10 23:23:35,808] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-10 23:23:36,029] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.664005
[2022-12-10 23:24:46,572] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.633854
[2022-12-10 23:25:41,065] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	2.161750
[2022-12-10 23:25:41,066] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.826742
[2022-12-10 23:25:41,067] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-10 23:25:41,361] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [5]/[120]





[2022-12-10 23:25:41,362] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-10 23:25:41,363] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-10 23:25:41,928] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 3.049371
[2022-12-10 23:29:00,732] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.798503
[2022-12-10 23:32:27,210] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.558736
[2022-12-10 23:35:53,276] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.853998
[2022-12-10 23:39:21,368] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 2.052909
[2022-12-10 23:42:47,054] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.807586
[2022-12-10 23:46:13,546] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.938791
[2022-12-10 23:49:42,706] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.657928
[2022-12-10 23:49:44,843] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-10 23:49:45,060] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.473392
[2022-12-10 23:51:02,150] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.601178
[2022-12-10 23:51:53,207] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.993636
[2022-12-10 23:51:53,208] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.734074
[2022-12-10 23:51:53,208] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-10 23:51:53,515] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [6]/[120]





[2022-12-10 23:51:53,517] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-10 23:51:53,518] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-10 23:51:53,717] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 2.096619
[2022-12-10 23:55:13,267] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.488062
[2022-12-10 23:58:44,218] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.641966
[2022-12-11 00:02:11,855] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.841634
[2022-12-11 00:05:40,808] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 2.602458
[2022-12-11 00:09:08,130] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.754443
[2022-12-11 00:12:33,727] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.724612
[2022-12-11 00:16:06,264] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 2.120424
[2022-12-11 00:16:08,818] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 00:16:09,038] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.626903
[2022-12-11 00:17:20,354] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.213931
[2022-12-11 00:18:12,706] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.874742
[2022-12-11 00:18:12,707] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.702652
[2022-12-11 00:18:12,708] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 00:18:13,047] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [7]/[120]





[2022-12-11 00:18:13,050] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 00:18:13,051] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 00:18:13,246] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.937594
[2022-12-11 00:21:41,057] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.779027
[2022-12-11 00:25:07,891] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.723048
[2022-12-11 00:28:36,707] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.282424
[2022-12-11 00:31:58,472] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.635010
[2022-12-11 00:35:30,127] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.685915
[2022-12-11 00:39:00,742] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.751010
[2022-12-11 00:42:26,364] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.711712
[2022-12-11 00:42:28,558] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 00:42:28,771] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.372573
[2022-12-11 00:43:40,949] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.639987
[2022-12-11 00:44:34,652] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.785156
[2022-12-11 00:44:34,655] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.664162
[2022-12-11 00:44:34,657] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 00:44:35,114] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [8]/[120]
[2022-12-11 00:44:35,116] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 00:44:35,118] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 00:44:35,571] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.391991
[2022-12-11 00:47:11,488] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.561858
[2022-12-11 00:47:57,009] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.864769
[2022-12-11 00:48:39,314] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.734185
[2022-12-11 00:49:18,705] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.508582
[2022-12-11 00:49:59,488] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.769118
[2022-12-11 00:50:40,495] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.777888
[2022-12-11 00:51:22,868] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.799528
[2022-12-11 00:51:23,596] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 00:51:23,693] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.431519
[2022-12-11 00:51:40,821] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.533747
[2022-12-11 00:51:53,010] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.708475
[2022-12-11 00:51:53,011] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.642277
[2022-12-11 00:51:53,013] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 00:51:53,330] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [9]/[120]
[2022-12-11 00:51:53,331] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 00:51:53,332] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 00:51:53,528] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.801837
[2022-12-11 00:52:33,969] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.359244
[2022-12-11 00:53:14,823] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.520880
[2022-12-11 00:53:56,047] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.350624
[2022-12-11 00:54:34,198] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.548536
[2022-12-11 00:55:14,928] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.304277
[2022-12-11 00:55:55,350] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.602396
[2022-12-11 00:56:35,481] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.476448
[2022-12-11 00:56:35,954] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 00:56:36,009] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.315288
[2022-12-11 00:56:49,842] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.384516
[2022-12-11 00:57:00,265] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.657217
[2022-12-11 00:57:00,266] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.615952
[2022-12-11 00:57:00,266] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 00:57:00,507] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [10]/[120]





[2022-12-11 00:57:00,508] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 00:57:00,509] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 00:57:00,713] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.553112
[2022-12-11 00:57:41,072] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.639033
[2022-12-11 00:58:21,887] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.421736
[2022-12-11 00:59:02,511] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.278541
[2022-12-11 00:59:41,947] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.262995
[2022-12-11 01:00:21,826] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.351040
[2022-12-11 01:01:02,056] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 2.366413
[2022-12-11 01:01:41,484] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.597169
[2022-12-11 01:01:41,913] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:01:41,975] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.337054
[2022-12-11 01:01:55,473] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.167356
[2022-12-11 01:02:05,625] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.613485
[2022-12-11 01:02:05,625] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.651262
[2022-12-11 01:02:05,626] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 01:02:05,868] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [11]/[120]
[2022-12-11 01:02:05,869] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:02:05,869] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:02:06,038] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.306818
[2022-12-11 01:02:46,680] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.227236
[2022-12-11 01:03:26,831] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.214525
[2022-12-11 01:04:07,234] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 2.109560
[2022-12-11 01:04:47,201] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.596836
[2022-12-11 01:05:27,435] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.477027
[2022-12-11 01:06:07,082] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 2.190516
[2022-12-11 01:06:45,225] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.342783
[2022-12-11 01:06:45,727] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:06:45,787] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.414967
[2022-12-11 01:07:00,245] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.192662
[2022-12-11 01:07:09,908] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.590595
[2022-12-11 01:07:09,909] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.607297
[2022-12-11 01:07:09,910] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 01:07:10,153] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [12]/[120]
[2022-12-11 01:07:10,154] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:07:10,154] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:07:10,328] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.211694
[2022-12-11 01:07:50,314] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.812047
[2022-12-11 01:08:30,486] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.771626
[2022-12-11 01:09:10,480] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.040587
[2022-12-11 01:09:50,450] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.899972
[2022-12-11 01:10:28,847] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.614372
[2022-12-11 01:11:08,530] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 2.317795
[2022-12-11 01:11:47,499] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.377972
[2022-12-11 01:11:47,892] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:11:47,959] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.364900
[2022-12-11 01:12:01,284] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.478661
[2022-12-11 01:12:11,265] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.551632
[2022-12-11 01:12:11,266] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.592271
[2022-12-11 01:12:11,267] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 01:12:11,506] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [13]/[120]





[2022-12-11 01:12:11,508] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:12:11,508] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:12:11,690] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.315200
[2022-12-11 01:12:50,314] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.428755
[2022-12-11 01:13:31,268] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.774543
[2022-12-11 01:14:09,959] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.557385
[2022-12-11 01:14:49,199] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.259615
[2022-12-11 01:15:27,726] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.560637
[2022-12-11 01:16:06,365] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 2.543410
[2022-12-11 01:16:46,763] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.473948
[2022-12-11 01:16:47,269] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:16:47,326] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.261201
[2022-12-11 01:17:01,192] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.461459
[2022-12-11 01:17:11,579] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.523542
[2022-12-11 01:17:11,580] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.632159
[2022-12-11 01:17:11,581] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 01:17:11,821] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [14]/[120]
[2022-12-11 01:17:11,822] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:17:11,823] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:17:11,998] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.594944
[2022-12-11 01:17:51,210] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.344748
[2022-12-11 01:18:31,314] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.853847
[2022-12-11 01:19:09,989] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.610380
[2022-12-11 01:19:48,685] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.460226
[2022-12-11 01:20:28,722] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.641469
[2022-12-11 01:21:08,166] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.766043
[2022-12-11 01:21:48,992] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.543581
[2022-12-11 01:21:49,503] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:21:49,565] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.270889
[2022-12-11 01:22:03,507] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.295966
[2022-12-11 01:22:13,962] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.502937
[2022-12-11 01:22:13,963] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.586131
[2022-12-11 01:22:13,964] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 01:22:14,207] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [15]/[120]
[2022-12-11 01:22:14,208] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:22:14,208] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:22:14,388] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.605608
[2022-12-11 01:22:54,210] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.678601
[2022-12-11 01:23:34,734] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.429526
[2022-12-11 01:24:13,694] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.140280
[2022-12-11 01:24:53,774] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.365701
[2022-12-11 01:25:32,644] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.335904
[2022-12-11 01:26:13,025] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.403955
[2022-12-11 01:26:52,029] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.911378
[2022-12-11 01:26:52,561] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:26:52,622] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.423006
[2022-12-11 01:27:06,120] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.388034
[2022-12-11 01:27:16,277] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.484352
[2022-12-11 01:27:16,278] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.582901
[2022-12-11 01:27:16,279] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 01:27:16,519] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [16]/[120]





[2022-12-11 01:27:16,520] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:27:16,521] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:27:16,687] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.409103
[2022-12-11 01:27:56,073] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.638005
[2022-12-11 01:28:34,552] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.252678
[2022-12-11 01:29:13,978] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.408012
[2022-12-11 01:29:53,356] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.404480
[2022-12-11 01:30:32,727] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.562197
[2022-12-11 01:31:12,025] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.001658
[2022-12-11 01:31:50,882] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.294389
[2022-12-11 01:31:51,337] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:31:51,395] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.319527
[2022-12-11 01:32:07,045] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.436180
[2022-12-11 01:32:17,463] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.471364
[2022-12-11 01:32:17,464] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.598970
[2022-12-11 01:32:17,464] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 01:32:17,705] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [17]/[120]





[2022-12-11 01:32:17,706] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:32:17,707] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:32:17,898] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.596575
[2022-12-11 01:32:57,248] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.191672
[2022-12-11 01:33:36,809] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.416623
[2022-12-11 01:34:15,978] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 2.062774
[2022-12-11 01:34:55,351] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.389346
[2022-12-11 01:35:35,402] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.422067
[2022-12-11 01:36:14,874] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.285107
[2022-12-11 01:36:53,894] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.404183
[2022-12-11 01:36:54,334] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:36:54,397] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.483084
[2022-12-11 01:37:08,020] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.059667
[2022-12-11 01:37:18,206] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.451268
[2022-12-11 01:37:18,207] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.575196
[2022-12-11 01:37:18,207] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 01:37:18,446] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [18]/[120]
[2022-12-11 01:37:18,446] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:37:18,447] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:37:18,629] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.604149
[2022-12-11 01:37:57,711] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.282533
[2022-12-11 01:38:37,077] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.699183
[2022-12-11 01:39:16,859] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.417472
[2022-12-11 01:39:56,406] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.344495
[2022-12-11 01:40:35,435] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.524457
[2022-12-11 01:41:15,037] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.438177
[2022-12-11 01:41:54,547] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.683431
[2022-12-11 01:41:55,032] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:41:55,090] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.235815
[2022-12-11 01:42:08,755] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.446272
[2022-12-11 01:42:18,986] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.442631
[2022-12-11 01:42:18,987] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.584950
[2022-12-11 01:42:18,988] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 01:42:19,230] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [19]/[120]





[2022-12-11 01:42:19,231] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:42:19,232] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:42:19,420] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.570104
[2022-12-11 01:42:59,927] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.893714
[2022-12-11 01:43:39,578] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.509722
[2022-12-11 01:44:18,663] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.313069
[2022-12-11 01:44:57,304] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.214359
[2022-12-11 01:45:36,018] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.585852
[2022-12-11 01:46:14,691] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.130800
[2022-12-11 01:46:54,190] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.401345
[2022-12-11 01:46:54,698] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:46:54,760] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.177183
[2022-12-11 01:47:08,234] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.477173
[2022-12-11 01:47:18,355] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.408515
[2022-12-11 01:47:18,356] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.627100
[2022-12-11 01:47:18,357] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 01:47:18,599] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [20]/[120]





[2022-12-11 01:47:18,600] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:47:18,600] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:47:18,760] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.498567
[2022-12-11 01:47:57,671] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.560640
[2022-12-11 01:48:36,460] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.404966
[2022-12-11 01:49:17,480] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.196653
[2022-12-11 01:49:57,484] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.464974
[2022-12-11 01:50:35,639] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.162130
[2022-12-11 01:51:14,559] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.259505
[2022-12-11 01:51:54,503] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.345005
[2022-12-11 01:51:54,939] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:51:55,008] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.178750
[2022-12-11 01:52:08,885] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.432219
[2022-12-11 01:52:19,305] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.409073
[2022-12-11 01:52:19,306] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.582002
[2022-12-11 01:52:19,306] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 01:52:19,547] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [21]/[120]





[2022-12-11 01:52:19,548] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:52:19,549] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:52:19,714] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.179723
[2022-12-11 01:52:56,938] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.083717
[2022-12-11 01:53:37,742] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.082178
[2022-12-11 01:54:16,634] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.438627
[2022-12-11 01:54:56,338] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 2.588721
[2022-12-11 01:55:35,618] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.154643
[2022-12-11 01:56:14,707] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.400144
[2022-12-11 01:56:54,248] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.706492
[2022-12-11 01:56:54,725] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 01:56:54,782] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.167106
[2022-12-11 01:57:08,343] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.350391
[2022-12-11 01:57:18,547] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.412127
[2022-12-11 01:57:18,548] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.602190
[2022-12-11 01:57:18,548] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 01:57:18,800] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [22]/[120]





[2022-12-11 01:57:18,802] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 01:57:18,802] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 01:57:18,977] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.362162
[2022-12-11 01:57:56,624] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.342539
[2022-12-11 01:58:36,487] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.423180
[2022-12-11 01:59:15,750] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.588413
[2022-12-11 01:59:55,502] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.783028
[2022-12-11 02:00:35,516] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.904181
[2022-12-11 02:01:15,270] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.398402
[2022-12-11 02:01:54,185] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.323868
[2022-12-11 02:01:54,602] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:01:54,658] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.304772
[2022-12-11 02:02:07,533] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.294784
[2022-12-11 02:02:17,569] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.395710
[2022-12-11 02:02:17,570] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.588275
[2022-12-11 02:02:17,570] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 02:02:17,811] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [23]/[120]
[2022-12-11 02:02:17,812] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.001
[2022-12-11 02:02:17,813] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:02:18,004] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.038571
[2022-12-11 02:02:55,313] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.965903
[2022-12-11 02:03:34,960] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.423985
[2022-12-11 02:04:14,782] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.370682
[2022-12-11 02:04:53,245] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.241837
[2022-12-11 02:05:31,492] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.251048
[2022-12-11 02:06:09,904] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.419608
[2022-12-11 02:06:48,787] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.290821
[2022-12-11 02:06:49,225] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:06:49,283] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.327659
[2022-12-11 02:07:02,578] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.290493
[2022-12-11 02:07:12,567] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.381918
[2022-12-11 02:07:12,568] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.605789
[2022-12-11 02:07:12,568] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 02:07:12,812] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [24]/[120]





[2022-12-11 02:07:12,813] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.0005
[2022-12-11 02:07:12,814] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:07:12,983] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.303022
[2022-12-11 02:07:52,030] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.358483
[2022-12-11 02:08:30,716] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.033942
[2022-12-11 02:09:09,419] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.216294
[2022-12-11 02:09:47,951] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.041135
[2022-12-11 02:10:26,055] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.936371
[2022-12-11 02:11:03,736] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.072970
[2022-12-11 02:11:41,515] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.358508
[2022-12-11 02:11:41,984] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:11:42,042] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.155402
[2022-12-11 02:11:55,392] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.183773
[2022-12-11 02:12:05,349] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.296171
[2022-12-11 02:12:05,350] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.556870
[2022-12-11 02:12:05,350] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 02:12:05,592] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [25]/[120]
[2022-12-11 02:12:05,593] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.0005
[2022-12-11 02:12:05,594] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:12:05,767] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.985668
[2022-12-11 02:12:44,191] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.477343
[2022-12-11 02:13:22,689] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.159065
[2022-12-11 02:14:00,697] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.149135
[2022-12-11 02:14:39,380] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.062301
[2022-12-11 02:15:17,632] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.607656
[2022-12-11 02:15:55,952] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.302979
[2022-12-11 02:16:36,839] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.421697
[2022-12-11 02:16:37,305] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:16:37,371] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.322327
[2022-12-11 02:16:50,714] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.252803
[2022-12-11 02:17:00,756] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.240702
[2022-12-11 02:17:00,757] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.501691
[2022-12-11 02:17:00,757] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 02:17:01,001] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [26]/[120]
[2022-12-11 02:17:01,002] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.0005
[2022-12-11 02:17:01,003] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:17:01,186] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.221127
[2022-12-11 02:17:39,807] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.134898
[2022-12-11 02:18:18,664] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.324447
[2022-12-11 02:18:57,701] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.124670
[2022-12-11 02:19:35,963] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.018839
[2022-12-11 02:20:14,353] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.179363
[2022-12-11 02:20:53,174] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.553538
[2022-12-11 02:21:31,417] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.904446
[2022-12-11 02:21:31,872] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:21:31,933] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.198484
[2022-12-11 02:21:45,304] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.244171
[2022-12-11 02:21:55,366] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.214319
[2022-12-11 02:21:55,366] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.534755
[2022-12-11 02:21:55,367] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 02:21:55,614] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [27]/[120]
[2022-12-11 02:21:55,615] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.0005
[2022-12-11 02:21:55,616] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:21:55,789] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.214909
[2022-12-11 02:22:34,743] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.023589
[2022-12-11 02:23:13,178] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.771807
[2022-12-11 02:23:52,411] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.315991
[2022-12-11 02:24:30,627] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.026558
[2022-12-11 02:25:09,576] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 2.045184
[2022-12-11 02:25:47,736] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.109131
[2022-12-11 02:26:25,717] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.106838
[2022-12-11 02:26:26,153] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:26:26,221] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.384172
[2022-12-11 02:26:39,550] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 1.920932
[2022-12-11 02:26:49,590] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.209727
[2022-12-11 02:26:49,591] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.521017
[2022-12-11 02:26:49,591] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 02:26:49,832] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [28]/[120]





[2022-12-11 02:26:49,833] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.0005
[2022-12-11 02:26:49,834] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:26:50,011] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.961452
[2022-12-11 02:27:28,550] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.952461
[2022-12-11 02:28:07,368] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.251218
[2022-12-11 02:28:46,150] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.234565
[2022-12-11 02:29:26,319] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.039017
[2022-12-11 02:30:05,374] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.110227
[2022-12-11 02:30:45,527] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.078489
[2022-12-11 02:31:24,621] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.022344
[2022-12-11 02:31:25,052] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:31:25,108] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.313233
[2022-12-11 02:31:38,805] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.089813
[2022-12-11 02:31:49,116] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.190095
[2022-12-11 02:31:49,116] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.510765
[2022-12-11 02:31:49,117] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 02:31:49,359] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [29]/[120]





[2022-12-11 02:31:49,360] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.0005
[2022-12-11 02:31:49,361] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:31:49,516] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.194464
[2022-12-11 02:32:28,952] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.371180
[2022-12-11 02:33:08,404] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.901806
[2022-12-11 02:33:47,990] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 2.117800
[2022-12-11 02:34:27,670] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.145003
[2022-12-11 02:35:06,313] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.271375
[2022-12-11 02:35:45,440] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.824311
[2022-12-11 02:36:25,575] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.524685
[2022-12-11 02:36:26,009] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:36:26,067] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.231961
[2022-12-11 02:36:39,789] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.160766
[2022-12-11 02:36:50,119] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.179819
[2022-12-11 02:36:50,120] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.509428
[2022-12-11 02:36:50,120] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 02:36:50,361] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [30]/[120]
[2022-12-11 02:36:50,362] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.0005
[2022-12-11 02:36:50,363] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:36:50,523] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.258651
[2022-12-11 02:37:29,016] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.975133
[2022-12-11 02:38:07,881] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.070796
[2022-12-11 02:38:46,276] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.329112
[2022-12-11 02:39:26,619] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.838974
[2022-12-11 02:40:06,451] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.109718
[2022-12-11 02:40:45,269] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.344778
[2022-12-11 02:41:24,682] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.005185
[2022-12-11 02:41:25,126] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:41:25,188] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439821
[2022-12-11 02:41:38,744] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.196458
[2022-12-11 02:41:48,948] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.178923
[2022-12-11 02:41:48,949] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.518382
[2022-12-11 02:41:48,950] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 02:41:49,192] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [31]/[120]
[2022-12-11 02:41:49,192] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.0005
[2022-12-11 02:41:49,193] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:41:49,363] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.100664
[2022-12-11 02:42:28,792] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.316537
[2022-12-11 02:43:08,599] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.178751
[2022-12-11 02:43:47,845] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.943753
[2022-12-11 02:44:26,752] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.119825
[2022-12-11 02:45:05,044] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.207433
[2022-12-11 02:45:43,884] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.077482
[2022-12-11 02:46:22,902] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.125081
[2022-12-11 02:46:23,362] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:46:23,424] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.174633
[2022-12-11 02:46:36,929] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.304495
[2022-12-11 02:46:47,081] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.156124
[2022-12-11 02:46:47,082] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525763
[2022-12-11 02:46:47,083] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 02:46:47,323] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [32]/[120]





[2022-12-11 02:46:47,324] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.00025
[2022-12-11 02:46:47,325] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:46:47,484] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.369716
[2022-12-11 02:47:26,497] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.850731
[2022-12-11 02:48:05,906] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.881116
[2022-12-11 02:48:44,696] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.515387
[2022-12-11 02:49:23,705] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.995029
[2022-12-11 02:50:02,379] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.227572
[2022-12-11 02:50:41,598] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.908654
[2022-12-11 02:51:20,413] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.076188
[2022-12-11 02:51:20,877] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:51:20,940] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.310749
[2022-12-11 02:51:34,520] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.399183
[2022-12-11 02:51:44,612] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.115527
[2022-12-11 02:51:44,613] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.498418
[2022-12-11 02:51:44,614] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 02:51:44,857] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [33]/[120]





[2022-12-11 02:51:44,858] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.00025
[2022-12-11 02:51:44,858] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:51:45,046] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.096483
[2022-12-11 02:52:23,605] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.062133
[2022-12-11 02:53:02,282] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.307291
[2022-12-11 02:53:41,393] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.795664
[2022-12-11 02:54:20,266] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.918048
[2022-12-11 02:54:59,345] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.017310
[2022-12-11 02:55:38,237] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.972098
[2022-12-11 02:56:16,477] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.058026
[2022-12-11 02:56:17,049] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 02:56:17,107] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.417027
[2022-12-11 02:56:30,641] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.237812
[2022-12-11 02:56:40,773] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.085605
[2022-12-11 02:56:40,774] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.532780
[2022-12-11 02:56:40,775] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 02:56:41,018] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [34]/[120]





[2022-12-11 02:56:41,019] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.00025
[2022-12-11 02:56:41,020] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 02:56:41,192] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.099561
[2022-12-11 02:57:21,144] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.341960
[2022-12-11 02:58:01,023] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.008870
[2022-12-11 02:58:40,435] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.842831
[2022-12-11 02:59:19,008] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.718769
[2022-12-11 02:59:57,644] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.198136
[2022-12-11 03:00:35,993] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.076077
[2022-12-11 03:01:17,876] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.897407
[2022-12-11 03:01:18,366] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:01:18,428] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.384375
[2022-12-11 03:01:32,174] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.272335
[2022-12-11 03:01:42,533] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.075272
[2022-12-11 03:01:42,534] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.515766
[2022-12-11 03:01:42,534] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 03:01:42,777] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [35]/[120]
[2022-12-11 03:01:42,778] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.00025
[2022-12-11 03:01:42,779] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:01:42,950] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.143128
[2022-12-11 03:02:21,480] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.704893
[2022-12-11 03:02:59,931] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.243430
[2022-12-11 03:03:38,682] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.950005
[2022-12-11 03:04:17,082] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.914022
[2022-12-11 03:04:55,743] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.958161
[2022-12-11 03:05:34,908] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.035799
[2022-12-11 03:06:13,984] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.788241
[2022-12-11 03:06:14,437] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:06:14,492] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.314013
[2022-12-11 03:06:27,920] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.386725
[2022-12-11 03:06:38,021] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.068823
[2022-12-11 03:06:38,021] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.500584
[2022-12-11 03:06:38,022] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 03:06:38,265] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [36]/[120]





[2022-12-11 03:06:38,266] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.00025
[2022-12-11 03:06:38,267] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:06:38,427] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.180187
[2022-12-11 03:07:17,169] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 2.045402
[2022-12-11 03:07:56,132] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.859085
[2022-12-11 03:08:34,872] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.724094
[2022-12-11 03:09:13,229] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.984858
[2022-12-11 03:09:52,045] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.020006
[2022-12-11 03:10:30,636] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.935948
[2022-12-11 03:11:09,857] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.083635
[2022-12-11 03:11:10,367] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:11:10,423] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.277663
[2022-12-11 03:11:23,841] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.438953
[2022-12-11 03:11:33,944] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.051339
[2022-12-11 03:11:33,945] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.536807
[2022-12-11 03:11:33,946] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 03:11:34,189] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [37]/[120]





[2022-12-11 03:11:34,190] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.00025
[2022-12-11 03:11:34,190] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:11:34,353] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.982799
[2022-12-11 03:12:13,045] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.040510
[2022-12-11 03:12:51,764] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.903453
[2022-12-11 03:13:30,638] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.119650
[2022-12-11 03:14:09,759] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.883465
[2022-12-11 03:14:47,890] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.224280
[2022-12-11 03:15:26,705] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.202092
[2022-12-11 03:16:05,177] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.134017
[2022-12-11 03:16:05,585] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:16:05,645] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.369335
[2022-12-11 03:16:19,053] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.280252
[2022-12-11 03:16:29,134] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.047311
[2022-12-11 03:16:29,134] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.517201
[2022-12-11 03:16:29,135] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 03:16:29,380] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [38]/[120]





[2022-12-11 03:16:29,381] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.00025
[2022-12-11 03:16:29,382] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:16:29,531] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.896794
[2022-12-11 03:17:07,484] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.996924
[2022-12-11 03:17:46,363] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.889168
[2022-12-11 03:18:25,239] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.885696
[2022-12-11 03:19:04,545] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.363772
[2022-12-11 03:19:42,609] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.877112
[2022-12-11 03:20:20,975] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.033786
[2022-12-11 03:20:59,994] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.999446
[2022-12-11 03:21:00,455] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:21:00,516] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.120630
[2022-12-11 03:21:13,916] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.323519
[2022-12-11 03:21:23,972] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.049247
[2022-12-11 03:21:23,973] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525811
[2022-12-11 03:21:23,973] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 03:21:24,205] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [39]/[120]





[2022-12-11 03:21:24,206] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.000125
[2022-12-11 03:21:24,207] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:21:24,412] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.297374
[2022-12-11 03:22:03,082] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.740029
[2022-12-11 03:22:41,672] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.064831
[2022-12-11 03:23:20,731] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.687260
[2022-12-11 03:23:59,382] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.076073
[2022-12-11 03:24:37,501] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.099866
[2022-12-11 03:25:15,978] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.016953
[2022-12-11 03:25:54,478] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.946207
[2022-12-11 03:25:54,941] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:25:55,006] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.275653
[2022-12-11 03:26:08,487] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.129648
[2022-12-11 03:26:18,565] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.013811
[2022-12-11 03:26:18,565] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.535335
[2022-12-11 03:26:18,566] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 03:26:18,810] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [40]/[120]





[2022-12-11 03:26:18,811] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.000125
[2022-12-11 03:26:18,812] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:26:18,973] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.227686
[2022-12-11 03:26:58,607] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.047478
[2022-12-11 03:27:37,004] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.927875
[2022-12-11 03:28:15,915] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.164436
[2022-12-11 03:28:55,338] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.742414
[2022-12-11 03:29:33,855] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.845487
[2022-12-11 03:30:12,432] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.864362
[2022-12-11 03:30:51,531] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.964728
[2022-12-11 03:30:52,066] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:30:52,124] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.264250
[2022-12-11 03:31:05,579] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.335360
[2022-12-11 03:31:15,654] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.021843
[2022-12-11 03:31:15,655] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.500643
[2022-12-11 03:31:15,656] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 03:31:15,900] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [41]/[120]
[2022-12-11 03:31:15,901] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.000125
[2022-12-11 03:31:15,901] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:31:16,077] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.858240
[2022-12-11 03:31:54,907] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.101874
[2022-12-11 03:32:33,210] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.004559
[2022-12-11 03:33:12,474] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.317442
[2022-12-11 03:33:51,432] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.056277
[2022-12-11 03:34:29,881] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.778717
[2022-12-11 03:35:09,078] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.792720
[2022-12-11 03:35:47,941] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.293350
[2022-12-11 03:35:48,390] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:35:48,448] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.293927
[2022-12-11 03:36:01,945] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.289925
[2022-12-11 03:36:12,039] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	1.006575
[2022-12-11 03:36:12,040] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.528631
[2022-12-11 03:36:12,041] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 03:36:12,285] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [42]/[120]
[2022-12-11 03:36:12,287] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.000125
[2022-12-11 03:36:12,287] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:36:12,433] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.975096
[2022-12-11 03:36:51,267] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.173214
[2022-12-11 03:37:29,663] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.948157
[2022-12-11 03:38:09,200] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.894767
[2022-12-11 03:38:48,160] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.854773
[2022-12-11 03:39:26,110] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.702102
[2022-12-11 03:40:06,406] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.950340
[2022-12-11 03:40:46,191] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.826528
[2022-12-11 03:40:46,661] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:40:46,723] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.291703
[2022-12-11 03:41:00,593] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.309206
[2022-12-11 03:41:10,907] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.995821
[2022-12-11 03:41:10,908] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.529376
[2022-12-11 03:41:10,908] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 03:41:11,148] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [43]/[120]
[2022-12-11 03:41:11,148] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.000125
[2022-12-11 03:41:11,149] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:41:11,324] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.868079
[2022-12-11 03:41:49,712] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.799334
[2022-12-11 03:42:28,738] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.717347
[2022-12-11 03:43:07,250] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.162714
[2022-12-11 03:43:45,982] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.913657
[2022-12-11 03:44:24,431] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.936051
[2022-12-11 03:45:02,685] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.564060
[2022-12-11 03:45:41,188] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.838271
[2022-12-11 03:45:41,684] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:45:41,737] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.233160
[2022-12-11 03:45:55,056] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.164391
[2022-12-11 03:46:05,070] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.996757
[2022-12-11 03:46:05,071] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.529556
[2022-12-11 03:46:05,072] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 03:46:05,318] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [44]/[120]
[2022-12-11 03:46:05,318] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	0.000125
[2022-12-11 03:46:05,319] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:46:05,508] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.136925
[2022-12-11 03:46:43,677] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.961035
[2022-12-11 03:47:22,263] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.709402
[2022-12-11 03:48:00,787] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.121606
[2022-12-11 03:48:40,049] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.831821
[2022-12-11 03:49:20,238] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.751270
[2022-12-11 03:49:59,947] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.847542
[2022-12-11 03:50:39,441] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.997400
[2022-12-11 03:50:39,876] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:50:39,943] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.279690
[2022-12-11 03:50:53,734] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.056236
[2022-12-11 03:51:04,075] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.990092
[2022-12-11 03:51:04,076] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.537932
[2022-12-11 03:51:04,077] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 03:51:04,322] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [45]/[120]
[2022-12-11 03:51:04,323] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.25e-05
[2022-12-11 03:51:04,323] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:51:04,479] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.842197
[2022-12-11 03:51:43,131] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.949850
[2022-12-11 03:52:23,297] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.910969
[2022-12-11 03:53:02,037] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.111767
[2022-12-11 03:53:40,204] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.903740
[2022-12-11 03:54:19,264] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.883542
[2022-12-11 03:54:57,630] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.675879
[2022-12-11 03:55:37,318] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.042771
[2022-12-11 03:55:37,843] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 03:55:37,911] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.451647
[2022-12-11 03:55:51,356] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.069598
[2022-12-11 03:56:01,469] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.978676
[2022-12-11 03:56:01,469] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.521113
[2022-12-11 03:56:01,470] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 03:56:01,713] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [46]/[120]





[2022-12-11 03:56:01,713] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.25e-05
[2022-12-11 03:56:01,714] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 03:56:01,919] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.678059
[2022-12-11 03:56:40,638] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.198862
[2022-12-11 03:57:19,471] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.273828
[2022-12-11 03:57:58,020] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.218890
[2022-12-11 03:58:36,963] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.785925
[2022-12-11 03:59:15,650] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.833355
[2022-12-11 03:59:53,961] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.870201
[2022-12-11 04:00:33,488] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.852031
[2022-12-11 04:00:33,954] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:00:34,019] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.474376
[2022-12-11 04:00:47,457] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.236243
[2022-12-11 04:00:57,553] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.978793
[2022-12-11 04:00:57,554] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525990
[2022-12-11 04:00:57,555] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 04:00:57,800] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [47]/[120]





[2022-12-11 04:00:57,801] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.25e-05
[2022-12-11 04:00:57,802] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:00:58,012] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.052101
[2022-12-11 04:01:37,137] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.493987
[2022-12-11 04:02:16,292] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.946001
[2022-12-11 04:02:54,836] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.821095
[2022-12-11 04:03:33,326] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.098842
[2022-12-11 04:04:12,495] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.082472
[2022-12-11 04:04:50,950] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.778829
[2022-12-11 04:05:29,647] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.216747
[2022-12-11 04:05:30,124] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:05:30,183] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.427610
[2022-12-11 04:05:43,615] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.186683
[2022-12-11 04:05:53,709] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.968712
[2022-12-11 04:05:53,710] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522015
[2022-12-11 04:05:53,710] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 04:05:53,953] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [48]/[120]
[2022-12-11 04:05:53,955] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.25e-05
[2022-12-11 04:05:53,956] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:05:54,135] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.181657
[2022-12-11 04:06:32,462] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.119275
[2022-12-11 04:07:10,833] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.874872
[2022-12-11 04:07:50,329] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.957705
[2022-12-11 04:08:29,517] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.356999
[2022-12-11 04:09:07,503] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.789361
[2022-12-11 04:09:46,506] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.920344
[2022-12-11 04:10:25,308] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.979276
[2022-12-11 04:10:25,746] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:10:25,804] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.416029
[2022-12-11 04:10:39,251] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.301586
[2022-12-11 04:10:49,317] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.972988
[2022-12-11 04:10:49,318] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.535908
[2022-12-11 04:10:49,318] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 04:10:49,561] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [49]/[120]





[2022-12-11 04:10:49,563] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.25e-05
[2022-12-11 04:10:49,564] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:10:49,729] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.042221
[2022-12-11 04:11:28,293] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.046308
[2022-12-11 04:12:07,253] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.044089
[2022-12-11 04:12:46,016] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.748993
[2022-12-11 04:13:24,516] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.881089
[2022-12-11 04:14:03,384] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.682114
[2022-12-11 04:14:41,868] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.668941
[2022-12-11 04:15:21,194] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.932944
[2022-12-11 04:15:21,623] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:15:21,682] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.458519
[2022-12-11 04:15:35,092] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.244105
[2022-12-11 04:15:45,168] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.969673
[2022-12-11 04:15:45,169] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.518487
[2022-12-11 04:15:45,170] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 04:15:45,412] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [50]/[120]
[2022-12-11 04:15:45,413] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.25e-05
[2022-12-11 04:15:45,414] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:15:45,613] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.872203
[2022-12-11 04:16:24,234] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.708712
[2022-12-11 04:17:03,645] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.923865
[2022-12-11 04:17:42,674] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.010152
[2022-12-11 04:18:21,460] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.630133
[2022-12-11 04:18:59,471] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.919735
[2022-12-11 04:19:38,775] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.940207
[2022-12-11 04:20:17,397] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.903554
[2022-12-11 04:20:17,860] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:20:17,920] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.400249
[2022-12-11 04:20:31,369] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.255212
[2022-12-11 04:20:41,448] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.957386
[2022-12-11 04:20:41,449] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.526622
[2022-12-11 04:20:41,449] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 04:20:41,694] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [51]/[120]





[2022-12-11 04:20:41,695] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.125e-05
[2022-12-11 04:20:41,696] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:20:41,867] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.230889
[2022-12-11 04:21:22,065] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.824500
[2022-12-11 04:22:01,713] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.886765
[2022-12-11 04:22:40,223] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.176016
[2022-12-11 04:23:18,695] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.864343
[2022-12-11 04:23:58,228] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.779591
[2022-12-11 04:24:36,737] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.959279
[2022-12-11 04:25:15,387] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.961278
[2022-12-11 04:25:16,009] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:25:16,064] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.393767
[2022-12-11 04:25:29,363] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.272383
[2022-12-11 04:25:39,350] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.962566
[2022-12-11 04:25:39,351] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.516734
[2022-12-11 04:25:39,351] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 04:25:39,594] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [52]/[120]





[2022-12-11 04:25:39,595] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.125e-05
[2022-12-11 04:25:39,596] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:25:39,750] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.795130
[2022-12-11 04:26:17,543] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.010757
[2022-12-11 04:26:56,173] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.830323
[2022-12-11 04:27:35,321] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.210968
[2022-12-11 04:28:15,405] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.984460
[2022-12-11 04:28:55,210] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.137770
[2022-12-11 04:29:35,756] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.003444
[2022-12-11 04:30:13,137] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.640999
[2022-12-11 04:30:13,594] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:30:13,658] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.471686
[2022-12-11 04:30:28,068] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.276357
[2022-12-11 04:30:37,808] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.948450
[2022-12-11 04:30:37,809] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.534886
[2022-12-11 04:30:37,810] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 04:30:38,058] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [53]/[120]
[2022-12-11 04:30:38,059] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.125e-05
[2022-12-11 04:30:38,060] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:30:38,211] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.772332
[2022-12-11 04:31:15,539] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.852374
[2022-12-11 04:31:56,331] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.774159
[2022-12-11 04:32:35,159] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.880232
[2022-12-11 04:33:14,173] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.895345
[2022-12-11 04:33:53,377] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.030774
[2022-12-11 04:34:31,694] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.330709
[2022-12-11 04:35:10,381] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.234212
[2022-12-11 04:35:10,783] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:35:10,842] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.448192
[2022-12-11 04:35:24,314] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.261755
[2022-12-11 04:35:34,458] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.950321
[2022-12-11 04:35:34,459] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.524213
[2022-12-11 04:35:34,460] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 04:35:34,704] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [54]/[120]





[2022-12-11 04:35:34,705] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.125e-05
[2022-12-11 04:35:34,706] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:35:34,866] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.843007
[2022-12-11 04:36:13,076] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.724249
[2022-12-11 04:36:51,700] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.725743
[2022-12-11 04:37:30,917] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.797562
[2022-12-11 04:38:09,848] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.004387
[2022-12-11 04:38:48,494] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.942809
[2022-12-11 04:39:27,508] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.734047
[2022-12-11 04:40:07,226] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.173383
[2022-12-11 04:40:07,669] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:40:07,734] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.478180
[2022-12-11 04:40:21,240] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.270009
[2022-12-11 04:40:31,366] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.960321
[2022-12-11 04:40:31,367] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.531331
[2022-12-11 04:40:31,368] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 04:40:31,614] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [55]/[120]





[2022-12-11 04:40:31,616] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.125e-05
[2022-12-11 04:40:31,616] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:40:31,832] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.711246
[2022-12-11 04:41:10,411] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.863672
[2022-12-11 04:41:49,334] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.952319
[2022-12-11 04:42:28,576] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.918251
[2022-12-11 04:43:07,393] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.046511
[2022-12-11 04:43:47,225] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.817274
[2022-12-11 04:44:25,743] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.725037
[2022-12-11 04:45:04,205] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.860264
[2022-12-11 04:45:04,680] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:45:04,738] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.410870
[2022-12-11 04:45:18,228] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.270209
[2022-12-11 04:45:28,425] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.944530
[2022-12-11 04:45:28,426] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.543815
[2022-12-11 04:45:28,426] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 04:45:28,673] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [56]/[120]
[2022-12-11 04:45:28,674] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.125e-05
[2022-12-11 04:45:28,675] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:45:28,853] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.916363
[2022-12-11 04:46:07,738] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.434810
[2022-12-11 04:46:47,221] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.800302
[2022-12-11 04:47:25,574] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.943861
[2022-12-11 04:48:04,631] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.680738
[2022-12-11 04:48:43,289] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.732913
[2022-12-11 04:49:22,301] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.858916
[2022-12-11 04:50:01,249] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.800958
[2022-12-11 04:50:01,765] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:50:01,826] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.392174
[2022-12-11 04:50:15,303] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.256073
[2022-12-11 04:50:25,430] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.949465
[2022-12-11 04:50:25,431] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.531625
[2022-12-11 04:50:25,431] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 04:50:25,677] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [57]/[120]





[2022-12-11 04:50:25,678] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.5625e-05
[2022-12-11 04:50:25,679] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:50:25,867] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.006517
[2022-12-11 04:51:04,780] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.869910
[2022-12-11 04:51:43,415] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.966683
[2022-12-11 04:52:23,219] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.613128
[2022-12-11 04:53:02,569] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.965787
[2022-12-11 04:53:41,174] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.040303
[2022-12-11 04:54:20,224] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.870873
[2022-12-11 04:54:58,819] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.874755
[2022-12-11 04:54:59,323] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:54:59,381] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.284539
[2022-12-11 04:55:12,876] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.241751
[2022-12-11 04:55:23,009] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.939040
[2022-12-11 04:55:23,009] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.537865
[2022-12-11 04:55:23,010] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 04:55:23,246] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [58]/[120]
[2022-12-11 04:55:23,247] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.5625e-05
[2022-12-11 04:55:23,247] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 04:55:23,400] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.733722
[2022-12-11 04:56:01,992] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.270147
[2022-12-11 04:56:40,833] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.992769
[2022-12-11 04:57:19,468] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.217449
[2022-12-11 04:57:58,579] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.076075
[2022-12-11 04:58:38,160] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.578058
[2022-12-11 04:59:17,050] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.122044
[2022-12-11 04:59:55,578] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.879639
[2022-12-11 04:59:56,065] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 04:59:56,123] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.388469
[2022-12-11 05:00:09,585] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.257523
[2022-12-11 05:00:19,714] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.946725
[2022-12-11 05:00:19,714] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.534323
[2022-12-11 05:00:19,715] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 05:00:19,960] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [59]/[120]





[2022-12-11 05:00:19,961] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.5625e-05
[2022-12-11 05:00:19,962] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:00:20,144] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.851265
[2022-12-11 05:00:59,108] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.191198
[2022-12-11 05:01:38,649] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.081640
[2022-12-11 05:02:17,271] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.111059
[2022-12-11 05:02:55,742] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.009973
[2022-12-11 05:03:34,275] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.902653
[2022-12-11 05:04:14,114] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.823061
[2022-12-11 05:04:54,173] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.971840
[2022-12-11 05:04:54,622] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:04:54,691] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.436837
[2022-12-11 05:05:08,571] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.242118
[2022-12-11 05:05:19,025] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.952184
[2022-12-11 05:05:19,026] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.526497
[2022-12-11 05:05:19,027] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 05:05:19,270] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [60]/[120]
[2022-12-11 05:05:19,271] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.5625e-05
[2022-12-11 05:05:19,272] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:05:19,442] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.049834
[2022-12-11 05:05:59,487] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.158403
[2022-12-11 05:06:39,383] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.938966
[2022-12-11 05:07:19,752] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.750617
[2022-12-11 05:07:58,821] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.433658
[2022-12-11 05:08:38,976] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.187424
[2022-12-11 05:09:18,949] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.914187
[2022-12-11 05:09:58,632] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.795092
[2022-12-11 05:09:59,082] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:09:59,138] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.440709
[2022-12-11 05:10:13,013] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.192499
[2022-12-11 05:10:23,453] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.950909
[2022-12-11 05:10:23,453] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.517260
[2022-12-11 05:10:23,454] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 05:10:23,698] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [61]/[120]





[2022-12-11 05:10:23,699] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.5625e-05
[2022-12-11 05:10:23,700] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:10:23,861] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.081206
[2022-12-11 05:11:01,166] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.949330
[2022-12-11 05:11:41,595] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.142269
[2022-12-11 05:12:20,880] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.837527
[2022-12-11 05:12:59,391] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.733128
[2022-12-11 05:13:40,416] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.349612
[2022-12-11 05:14:20,300] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.668429
[2022-12-11 05:15:00,261] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.744097
[2022-12-11 05:15:00,717] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:15:00,788] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.402333
[2022-12-11 05:15:14,728] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.216602
[2022-12-11 05:15:25,126] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.946816
[2022-12-11 05:15:25,127] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.508704
[2022-12-11 05:15:25,128] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 05:15:25,362] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [62]/[120]
[2022-12-11 05:15:25,363] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.5625e-05
[2022-12-11 05:15:25,364] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:15:25,542] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.847806
[2022-12-11 05:16:04,323] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.911029
[2022-12-11 05:16:43,781] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.369450
[2022-12-11 05:17:23,244] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.853309
[2022-12-11 05:18:02,402] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.979447
[2022-12-11 05:18:41,623] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.947875
[2022-12-11 05:19:21,242] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.232107
[2022-12-11 05:19:59,810] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.791378
[2022-12-11 05:20:00,285] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:20:00,344] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.448218
[2022-12-11 05:20:13,991] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.223653
[2022-12-11 05:20:24,199] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.939438
[2022-12-11 05:20:24,199] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.538081
[2022-12-11 05:20:24,200] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 05:20:24,439] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [63]/[120]





[2022-12-11 05:20:24,440] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	7.8125e-06
[2022-12-11 05:20:24,441] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:20:24,595] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.891736
[2022-12-11 05:21:03,293] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.946476
[2022-12-11 05:21:42,129] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.885469
[2022-12-11 05:22:20,975] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.950577
[2022-12-11 05:23:00,338] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.999482
[2022-12-11 05:23:38,977] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.925841
[2022-12-11 05:24:19,035] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.362081
[2022-12-11 05:24:57,652] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.080705
[2022-12-11 05:24:58,108] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:24:58,165] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.445864
[2022-12-11 05:25:11,746] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.216090
[2022-12-11 05:25:21,923] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.939556
[2022-12-11 05:25:21,923] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.527776
[2022-12-11 05:25:21,924] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 05:25:22,161] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [64]/[120]





[2022-12-11 05:25:22,162] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	7.8125e-06
[2022-12-11 05:25:22,162] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:25:22,325] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.747675
[2022-12-11 05:26:01,242] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.863923
[2022-12-11 05:26:40,546] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.771811
[2022-12-11 05:27:19,556] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.978644
[2022-12-11 05:27:58,652] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.909316
[2022-12-11 05:28:37,934] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.804234
[2022-12-11 05:29:17,377] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.872031
[2022-12-11 05:29:56,663] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.669904
[2022-12-11 05:29:57,083] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:29:57,141] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.457565
[2022-12-11 05:30:10,712] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.242034
[2022-12-11 05:30:20,917] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.935372
[2022-12-11 05:30:20,918] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.537099
[2022-12-11 05:30:20,919] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 05:30:21,171] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [65]/[120]





[2022-12-11 05:30:21,172] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	7.8125e-06
[2022-12-11 05:30:21,173] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:30:21,378] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.062375
[2022-12-11 05:30:59,560] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.256999
[2022-12-11 05:31:38,629] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.893972
[2022-12-11 05:32:17,470] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.813055
[2022-12-11 05:32:57,009] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.899312
[2022-12-11 05:33:35,651] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.804741
[2022-12-11 05:34:15,428] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.082785
[2022-12-11 05:34:54,058] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.752613
[2022-12-11 05:34:54,502] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:34:54,569] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.453111
[2022-12-11 05:35:08,178] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.252476
[2022-12-11 05:35:18,393] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.937969
[2022-12-11 05:35:18,393] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.531473
[2022-12-11 05:35:18,394] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 05:35:18,640] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [66]/[120]





[2022-12-11 05:35:18,641] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	7.8125e-06
[2022-12-11 05:35:18,642] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:35:18,858] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.922512
[2022-12-11 05:35:57,649] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.838025
[2022-12-11 05:36:36,620] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.815069
[2022-12-11 05:37:16,402] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.976057
[2022-12-11 05:37:55,258] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.892845
[2022-12-11 05:38:34,567] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.803597
[2022-12-11 05:39:13,644] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.001104
[2022-12-11 05:39:52,527] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.784218
[2022-12-11 05:39:52,963] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:39:53,025] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.443395
[2022-12-11 05:40:06,622] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.268095
[2022-12-11 05:40:16,816] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.936234
[2022-12-11 05:40:16,817] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.524840
[2022-12-11 05:40:16,817] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 05:40:17,066] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [67]/[120]
[2022-12-11 05:40:17,067] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	7.8125e-06
[2022-12-11 05:40:17,068] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:40:17,251] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.736404
[2022-12-11 05:40:55,911] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.824191
[2022-12-11 05:41:34,586] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.061827
[2022-12-11 05:42:12,401] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.007848
[2022-12-11 05:42:50,202] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.732209
[2022-12-11 05:43:30,531] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.777177
[2022-12-11 05:44:09,022] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.733173
[2022-12-11 05:44:47,642] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.844654
[2022-12-11 05:44:48,094] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:44:48,152] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.314513
[2022-12-11 05:45:01,619] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.239775
[2022-12-11 05:45:11,684] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.941008
[2022-12-11 05:45:11,684] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.520401
[2022-12-11 05:45:11,685] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 05:45:11,931] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [68]/[120]
[2022-12-11 05:45:11,932] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	7.8125e-06
[2022-12-11 05:45:11,932] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:45:12,127] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.951922
[2022-12-11 05:45:50,344] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.793704
[2022-12-11 05:46:28,855] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.187945
[2022-12-11 05:47:07,481] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.075541
[2022-12-11 05:47:46,104] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.940386
[2022-12-11 05:48:24,426] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.027079
[2022-12-11 05:49:03,127] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.967812
[2022-12-11 05:49:41,884] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.058498
[2022-12-11 05:49:42,361] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:49:42,422] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.428123
[2022-12-11 05:49:55,889] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.255129
[2022-12-11 05:50:06,042] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.940567
[2022-12-11 05:50:06,043] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.530081
[2022-12-11 05:50:06,043] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 05:50:06,290] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [69]/[120]





[2022-12-11 05:50:06,291] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.90625e-06
[2022-12-11 05:50:06,292] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:50:06,485] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.763652
[2022-12-11 05:50:45,397] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.711106
[2022-12-11 05:51:24,322] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.077199
[2022-12-11 05:52:03,056] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.006833
[2022-12-11 05:52:43,670] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.734232
[2022-12-11 05:53:23,604] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.788540
[2022-12-11 05:54:04,797] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.243889
[2022-12-11 05:54:43,418] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.737739
[2022-12-11 05:54:43,818] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:54:43,875] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.441804
[2022-12-11 05:54:56,747] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.262597
[2022-12-11 05:55:07,333] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.935850
[2022-12-11 05:55:07,334] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.524785
[2022-12-11 05:55:07,334] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 05:55:07,581] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [70]/[120]





[2022-12-11 05:55:07,582] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.90625e-06
[2022-12-11 05:55:07,582] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 05:55:07,741] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.966651
[2022-12-11 05:55:45,897] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.132295
[2022-12-11 05:56:24,808] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.745857
[2022-12-11 05:57:05,807] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.721382
[2022-12-11 05:57:44,143] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.275782
[2022-12-11 05:58:22,733] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.849229
[2022-12-11 05:59:00,760] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.980880
[2022-12-11 05:59:39,027] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.835951
[2022-12-11 05:59:39,498] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 05:59:39,570] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.416660
[2022-12-11 05:59:53,114] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.247947
[2022-12-11 06:00:04,555] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.940942
[2022-12-11 06:00:04,555] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.531654
[2022-12-11 06:00:04,556] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 06:00:04,802] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [71]/[120]





[2022-12-11 06:00:04,803] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.90625e-06
[2022-12-11 06:00:04,803] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:00:04,976] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.994173
[2022-12-11 06:00:44,969] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.639739
[2022-12-11 06:01:24,270] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.888320
[2022-12-11 06:02:03,874] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.960914
[2022-12-11 06:02:44,474] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.808627
[2022-12-11 06:03:24,902] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.014937
[2022-12-11 06:04:05,126] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.838578
[2022-12-11 06:04:45,208] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.863066
[2022-12-11 06:04:45,708] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:04:45,766] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.457966
[2022-12-11 06:04:59,698] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.241672
[2022-12-11 06:05:10,178] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.938788
[2022-12-11 06:05:10,179] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.526964
[2022-12-11 06:05:10,179] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 06:05:10,424] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [72]/[120]





[2022-12-11 06:05:10,425] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.90625e-06
[2022-12-11 06:05:10,426] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:05:10,609] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.071020
[2022-12-11 06:05:51,372] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.842682
[2022-12-11 06:06:30,629] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.920068
[2022-12-11 06:07:10,147] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.969459
[2022-12-11 06:07:49,359] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.850423
[2022-12-11 06:08:28,599] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.843222
[2022-12-11 06:09:07,863] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.940441
[2022-12-11 06:09:47,438] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.825488
[2022-12-11 06:09:48,004] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:09:48,069] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.437619
[2022-12-11 06:10:01,723] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.241201
[2022-12-11 06:10:11,983] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.938442
[2022-12-11 06:10:11,984] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.531277
[2022-12-11 06:10:11,985] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 06:10:12,241] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [73]/[120]





[2022-12-11 06:10:12,242] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.90625e-06
[2022-12-11 06:10:12,243] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:10:12,458] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.101899
[2022-12-11 06:10:51,739] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.309275
[2022-12-11 06:11:31,153] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.104005
[2022-12-11 06:12:09,337] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.031189
[2022-12-11 06:12:50,051] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.799180
[2022-12-11 06:13:29,094] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.263837
[2022-12-11 06:14:07,863] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.087473
[2022-12-11 06:14:46,781] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.823345
[2022-12-11 06:14:47,280] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:14:47,338] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.448367
[2022-12-11 06:15:00,856] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.223216
[2022-12-11 06:15:11,040] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.939656
[2022-12-11 06:15:11,041] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525623
[2022-12-11 06:15:11,041] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 06:15:11,286] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [74]/[120]
[2022-12-11 06:15:11,287] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.90625e-06
[2022-12-11 06:15:11,288] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:15:11,493] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.129614
[2022-12-11 06:15:49,997] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.434236
[2022-12-11 06:16:29,328] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.767934
[2022-12-11 06:17:07,362] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.373199
[2022-12-11 06:17:47,632] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.019960
[2022-12-11 06:18:25,559] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.990610
[2022-12-11 06:19:04,208] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.792625
[2022-12-11 06:19:43,413] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.752712
[2022-12-11 06:19:43,905] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:19:43,968] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.444364
[2022-12-11 06:19:57,333] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.225556
[2022-12-11 06:20:07,407] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.940717
[2022-12-11 06:20:07,408] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.529118
[2022-12-11 06:20:07,409] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 06:20:07,675] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [75]/[120]





[2022-12-11 06:20:07,676] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.953125e-06
[2022-12-11 06:20:07,677] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:20:07,874] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.116277
[2022-12-11 06:20:46,390] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.925593
[2022-12-11 06:21:24,774] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.032328
[2022-12-11 06:22:02,919] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.778432
[2022-12-11 06:22:41,543] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.176478
[2022-12-11 06:23:19,756] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.969244
[2022-12-11 06:23:58,704] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.786601
[2022-12-11 06:24:37,626] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.666978
[2022-12-11 06:24:38,082] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:24:38,137] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.443890
[2022-12-11 06:24:51,478] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.238770
[2022-12-11 06:25:01,521] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.941044
[2022-12-11 06:25:01,521] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.527542
[2022-12-11 06:25:01,522] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 06:25:01,771] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [76]/[120]





[2022-12-11 06:25:01,772] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.953125e-06
[2022-12-11 06:25:01,773] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:25:01,941] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.018842
[2022-12-11 06:25:40,701] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.609746
[2022-12-11 06:26:19,133] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.888712
[2022-12-11 06:26:57,287] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.070208
[2022-12-11 06:27:35,343] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.979083
[2022-12-11 06:28:14,026] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.885212
[2022-12-11 06:28:52,838] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.746820
[2022-12-11 06:29:31,126] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.095458
[2022-12-11 06:29:31,599] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:29:31,659] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.440262
[2022-12-11 06:29:45,000] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.234480
[2022-12-11 06:29:55,022] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.931497
[2022-12-11 06:29:55,023] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.528955
[2022-12-11 06:29:55,024] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 06:29:55,262] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [77]/[120]
[2022-12-11 06:29:55,263] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.953125e-06
[2022-12-11 06:29:55,264] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:29:55,516] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.308497
[2022-12-11 06:30:33,671] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.858528
[2022-12-11 06:31:12,474] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.962105
[2022-12-11 06:31:51,131] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.958314
[2022-12-11 06:32:30,450] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.887307
[2022-12-11 06:33:09,127] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.748267
[2022-12-11 06:33:47,400] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.781252
[2022-12-11 06:34:25,244] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.969735
[2022-12-11 06:34:25,693] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:34:25,755] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.440074
[2022-12-11 06:34:39,129] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.241660
[2022-12-11 06:34:49,177] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.946605
[2022-12-11 06:34:49,178] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523544
[2022-12-11 06:34:49,179] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 06:34:49,426] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [78]/[120]
[2022-12-11 06:34:49,426] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.953125e-06
[2022-12-11 06:34:49,427] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:34:49,583] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.911625
[2022-12-11 06:35:27,990] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.123406
[2022-12-11 06:36:06,443] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.132333
[2022-12-11 06:36:45,419] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.814968
[2022-12-11 06:37:24,014] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.843903
[2022-12-11 06:38:02,522] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.824033
[2022-12-11 06:38:41,541] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.442702
[2022-12-11 06:39:19,796] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.970949
[2022-12-11 06:39:20,220] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:39:20,279] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.444881
[2022-12-11 06:39:33,684] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235138
[2022-12-11 06:39:43,748] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.940701
[2022-12-11 06:39:43,749] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.524686
[2022-12-11 06:39:43,750] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 06:39:43,996] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [79]/[120]





[2022-12-11 06:39:43,997] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.953125e-06
[2022-12-11 06:39:43,998] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:39:44,170] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.823393
[2022-12-11 06:40:22,421] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.825816
[2022-12-11 06:41:01,633] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.842419
[2022-12-11 06:41:40,260] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.128782
[2022-12-11 06:42:18,859] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.923142
[2022-12-11 06:42:56,870] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.917974
[2022-12-11 06:43:36,040] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.954153
[2022-12-11 06:44:14,674] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.191963
[2022-12-11 06:44:15,109] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:44:15,171] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.442649
[2022-12-11 06:44:28,568] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.233402
[2022-12-11 06:44:38,645] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.938181
[2022-12-11 06:44:38,646] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.524549
[2022-12-11 06:44:38,647] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 06:44:38,903] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [80]/[120]





[2022-12-11 06:44:38,904] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.953125e-06
[2022-12-11 06:44:38,904] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:44:39,147] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.363047
[2022-12-11 06:45:18,082] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.203738
[2022-12-11 06:45:56,833] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.968582
[2022-12-11 06:46:35,683] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.080473
[2022-12-11 06:47:14,367] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.410611
[2022-12-11 06:47:52,542] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.214400
[2022-12-11 06:48:31,045] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.962632
[2022-12-11 06:49:09,236] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.850611
[2022-12-11 06:49:09,689] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:49:09,747] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.455076
[2022-12-11 06:49:23,157] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.228918
[2022-12-11 06:49:33,174] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.938586
[2022-12-11 06:49:33,174] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.528178
[2022-12-11 06:49:33,175] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 06:49:33,413] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [81]/[120]





[2022-12-11 06:49:33,415] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	9.765625e-07
[2022-12-11 06:49:33,415] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:49:33,626] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.738566
[2022-12-11 06:50:12,072] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.275102
[2022-12-11 06:50:50,552] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.902250
[2022-12-11 06:51:28,808] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.716484
[2022-12-11 06:52:07,611] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.758864
[2022-12-11 06:52:45,563] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.829438
[2022-12-11 06:53:24,598] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.892343
[2022-12-11 06:54:02,768] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.761023
[2022-12-11 06:54:03,246] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:54:03,307] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.451886
[2022-12-11 06:54:16,686] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.234144
[2022-12-11 06:54:26,720] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.939685
[2022-12-11 06:54:26,721] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525022
[2022-12-11 06:54:26,721] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 06:54:26,968] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [82]/[120]





[2022-12-11 06:54:26,969] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	9.765625e-07
[2022-12-11 06:54:26,969] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:54:27,144] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.965740
[2022-12-11 06:55:05,402] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.024403
[2022-12-11 06:55:43,388] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.847717
[2022-12-11 06:56:21,240] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.133247
[2022-12-11 06:57:00,244] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.725587
[2022-12-11 06:57:39,075] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.866627
[2022-12-11 06:58:16,678] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.791268
[2022-12-11 06:58:55,596] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.937382
[2022-12-11 06:58:56,112] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 06:58:56,174] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.419292
[2022-12-11 06:59:09,550] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.237127
[2022-12-11 06:59:19,634] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.940267
[2022-12-11 06:59:19,634] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525953
[2022-12-11 06:59:19,635] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 06:59:19,880] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [83]/[120]





[2022-12-11 06:59:19,881] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	9.765625e-07
[2022-12-11 06:59:19,882] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 06:59:20,080] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.775219
[2022-12-11 06:59:58,882] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.813654
[2022-12-11 07:00:37,022] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.951582
[2022-12-11 07:01:15,492] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.717111
[2022-12-11 07:01:54,240] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.391526
[2022-12-11 07:02:34,677] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.762821
[2022-12-11 07:03:15,163] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.220358
[2022-12-11 07:03:54,302] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.929461
[2022-12-11 07:03:54,823] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:03:54,880] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.438176
[2022-12-11 07:04:08,508] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235576
[2022-12-11 07:04:18,734] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.930677
[2022-12-11 07:04:18,734] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.526813
[2022-12-11 07:04:18,735] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 07:04:18,982] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [84]/[120]





[2022-12-11 07:04:18,983] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	9.765625e-07
[2022-12-11 07:04:18,984] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:04:19,171] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.730476
[2022-12-11 07:04:58,265] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.829839
[2022-12-11 07:05:37,472] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.847671
[2022-12-11 07:06:16,741] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.863831
[2022-12-11 07:06:55,990] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.293155
[2022-12-11 07:07:35,515] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.156944
[2022-12-11 07:08:14,845] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.890796
[2022-12-11 07:08:54,416] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.915127
[2022-12-11 07:08:54,858] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:08:54,913] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.428208
[2022-12-11 07:09:08,581] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.243169
[2022-12-11 07:09:18,803] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.934612
[2022-12-11 07:09:18,803] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.524172
[2022-12-11 07:09:18,804] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 07:09:19,050] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [85]/[120]





[2022-12-11 07:09:19,052] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	9.765625e-07
[2022-12-11 07:09:19,053] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:09:19,238] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.750527
[2022-12-11 07:09:56,692] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.905246
[2022-12-11 07:10:36,835] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.753271
[2022-12-11 07:11:15,918] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.748369
[2022-12-11 07:11:55,637] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.909185
[2022-12-11 07:12:35,981] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.771094
[2022-12-11 07:13:13,772] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.160440
[2022-12-11 07:13:53,172] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.682869
[2022-12-11 07:13:53,642] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:13:53,706] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.445947
[2022-12-11 07:14:06,891] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.236813
[2022-12-11 07:14:16,744] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.935239
[2022-12-11 07:14:16,745] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.524389
[2022-12-11 07:14:16,745] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 07:14:16,990] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [86]/[120]





[2022-12-11 07:14:16,991] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	9.765625e-07
[2022-12-11 07:14:16,992] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:14:17,135] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.705227
[2022-12-11 07:14:54,970] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.761535
[2022-12-11 07:15:35,717] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.804513
[2022-12-11 07:16:15,165] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.208351
[2022-12-11 07:16:54,756] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.568632
[2022-12-11 07:17:33,766] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.786696
[2022-12-11 07:18:12,732] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.632773
[2022-12-11 07:18:51,825] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.734890
[2022-12-11 07:18:52,274] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:18:52,337] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.444649
[2022-12-11 07:19:05,470] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.240889
[2022-12-11 07:19:15,610] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.930828
[2022-12-11 07:19:15,611] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.524542
[2022-12-11 07:19:15,611] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 07:19:15,863] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [87]/[120]





[2022-12-11 07:19:15,864] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	4.8828125e-07
[2022-12-11 07:19:15,865] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:19:16,036] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.806421
[2022-12-11 07:19:54,047] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.099859
[2022-12-11 07:20:34,528] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.035693
[2022-12-11 07:21:13,792] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.843210
[2022-12-11 07:21:53,196] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.909094
[2022-12-11 07:22:32,684] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.975961
[2022-12-11 07:23:11,688] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.965095
[2022-12-11 07:23:50,818] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.991363
[2022-12-11 07:23:51,262] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:23:51,327] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.444505
[2022-12-11 07:24:04,921] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.237648
[2022-12-11 07:24:15,127] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.934472
[2022-12-11 07:24:15,127] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525032
[2022-12-11 07:24:15,129] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 07:24:15,373] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [88]/[120]
[2022-12-11 07:24:15,374] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	4.8828125e-07
[2022-12-11 07:24:15,375] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:24:15,550] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.868516
[2022-12-11 07:24:53,253] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.812067
[2022-12-11 07:25:32,751] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.803636
[2022-12-11 07:26:11,519] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.963488
[2022-12-11 07:26:50,697] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.783938
[2022-12-11 07:27:29,466] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.756275
[2022-12-11 07:28:08,636] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.903717
[2022-12-11 07:28:47,249] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.842847
[2022-12-11 07:28:47,694] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:28:47,756] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.444648
[2022-12-11 07:29:01,166] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235666
[2022-12-11 07:29:11,255] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.933944
[2022-12-11 07:29:11,256] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.524784
[2022-12-11 07:29:11,256] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 07:29:11,504] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [89]/[120]





[2022-12-11 07:29:11,505] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	4.8828125e-07
[2022-12-11 07:29:11,506] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:29:11,674] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.916233
[2022-12-11 07:29:50,589] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.031551
[2022-12-11 07:30:30,047] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.840287
[2022-12-11 07:31:09,293] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.898792
[2022-12-11 07:31:48,921] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.046113
[2022-12-11 07:32:27,756] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.907538
[2022-12-11 07:33:06,678] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.580100
[2022-12-11 07:33:46,987] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.713147
[2022-12-11 07:33:47,427] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:33:47,482] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.445133
[2022-12-11 07:34:01,226] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235321
[2022-12-11 07:34:11,064] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.934375
[2022-12-11 07:34:11,064] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525246
[2022-12-11 07:34:11,065] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 07:34:11,316] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [90]/[120]
[2022-12-11 07:34:11,317] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	4.8828125e-07
[2022-12-11 07:34:11,318] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:34:11,477] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.046545
[2022-12-11 07:34:51,439] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.881568
[2022-12-11 07:35:29,309] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.154557
[2022-12-11 07:36:08,361] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.799118
[2022-12-11 07:36:47,206] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.969828
[2022-12-11 07:37:25,773] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.268217
[2022-12-11 07:38:05,299] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.840205
[2022-12-11 07:38:44,868] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.208524
[2022-12-11 07:38:45,347] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:38:45,413] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.445344
[2022-12-11 07:38:59,128] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235026
[2022-12-11 07:39:09,448] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.936455
[2022-12-11 07:39:09,449] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.526077
[2022-12-11 07:39:09,449] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 07:39:09,697] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [91]/[120]
[2022-12-11 07:39:09,698] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	4.8828125e-07
[2022-12-11 07:39:09,699] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:39:09,882] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.960458
[2022-12-11 07:39:47,410] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.642601
[2022-12-11 07:40:27,886] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.854892
[2022-12-11 07:41:06,225] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.777600
[2022-12-11 07:41:46,037] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.086214
[2022-12-11 07:42:25,512] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.828295
[2022-12-11 07:43:04,767] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.803514
[2022-12-11 07:43:44,148] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.756582
[2022-12-11 07:43:44,588] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:43:44,652] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.445419
[2022-12-11 07:43:58,393] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.239009
[2022-12-11 07:44:08,723] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.938124
[2022-12-11 07:44:08,724] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.527091
[2022-12-11 07:44:08,725] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 07:44:08,970] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [92]/[120]





[2022-12-11 07:44:08,970] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	4.8828125e-07
[2022-12-11 07:44:08,971] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:44:09,143] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.852783
[2022-12-11 07:44:47,449] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.972097
[2022-12-11 07:45:25,610] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.780938
[2022-12-11 07:46:04,620] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.038692
[2022-12-11 07:46:42,802] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.097918
[2022-12-11 07:47:23,259] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.979565
[2022-12-11 07:48:01,875] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.202375
[2022-12-11 07:48:40,818] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.867786
[2022-12-11 07:48:41,286] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:48:41,345] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.445367
[2022-12-11 07:48:54,809] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.236577
[2022-12-11 07:49:04,915] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.939288
[2022-12-11 07:49:04,916] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525768
[2022-12-11 07:49:04,916] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 07:49:05,165] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [93]/[120]
[2022-12-11 07:49:05,166] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	2.44140625e-07
[2022-12-11 07:49:05,167] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:49:05,346] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.028005
[2022-12-11 07:49:44,568] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.916301
[2022-12-11 07:50:22,885] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.830676
[2022-12-11 07:51:01,687] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.755328
[2022-12-11 07:51:41,281] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.171035
[2022-12-11 07:52:20,193] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.939705
[2022-12-11 07:52:59,076] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.878434
[2022-12-11 07:53:37,660] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.249324
[2022-12-11 07:53:38,097] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:53:38,159] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.445333
[2022-12-11 07:53:51,621] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.237284
[2022-12-11 07:54:01,716] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.935057
[2022-12-11 07:54:01,717] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525619
[2022-12-11 07:54:01,717] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 07:54:01,964] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [94]/[120]
[2022-12-11 07:54:01,965] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	2.44140625e-07
[2022-12-11 07:54:01,966] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:54:02,145] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.773405
[2022-12-11 07:54:41,548] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.613973
[2022-12-11 07:55:19,765] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.773920
[2022-12-11 07:55:59,007] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.767674
[2022-12-11 07:56:38,092] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.843309
[2022-12-11 07:57:16,907] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.940689
[2022-12-11 07:57:55,941] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.796067
[2022-12-11 07:58:34,846] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.958716
[2022-12-11 07:58:35,311] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 07:58:35,366] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.445167
[2022-12-11 07:58:48,867] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.237650
[2022-12-11 07:58:58,952] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.932900
[2022-12-11 07:58:58,953] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525092
[2022-12-11 07:58:58,953] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 07:58:59,201] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [95]/[120]





[2022-12-11 07:58:59,203] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	2.44140625e-07
[2022-12-11 07:58:59,204] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 07:58:59,378] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.875237
[2022-12-11 07:59:38,005] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.860433
[2022-12-11 08:00:16,951] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.224658
[2022-12-11 08:00:56,527] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.051791
[2022-12-11 08:01:35,373] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.984210
[2022-12-11 08:02:14,468] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.776239
[2022-12-11 08:02:52,505] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.718473
[2022-12-11 08:03:31,874] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.112936
[2022-12-11 08:03:32,286] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:03:32,346] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439801
[2022-12-11 08:03:45,733] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.237510
[2022-12-11 08:03:55,766] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.943155
[2022-12-11 08:03:55,767] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525698
[2022-12-11 08:03:55,767] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 08:03:56,018] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [96]/[120]





[2022-12-11 08:03:56,019] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	2.44140625e-07
[2022-12-11 08:03:56,019] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:03:56,185] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.755522
[2022-12-11 08:04:34,342] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.177533
[2022-12-11 08:05:13,049] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.782444
[2022-12-11 08:05:51,510] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.907793
[2022-12-11 08:06:30,166] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.238642
[2022-12-11 08:07:08,631] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.900636
[2022-12-11 08:07:46,875] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.911579
[2022-12-11 08:08:26,776] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.754273
[2022-12-11 08:08:27,184] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:08:27,244] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.445173
[2022-12-11 08:08:40,711] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.237364
[2022-12-11 08:08:50,809] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.930588
[2022-12-11 08:08:50,810] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.525600
[2022-12-11 08:08:50,810] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 08:08:51,058] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [97]/[120]





[2022-12-11 08:08:51,060] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	2.44140625e-07
[2022-12-11 08:08:51,061] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:08:51,226] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.742247
[2022-12-11 08:09:29,770] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.834356
[2022-12-11 08:10:09,510] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.191690
[2022-12-11 08:10:47,226] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.128576
[2022-12-11 08:11:25,326] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.856698
[2022-12-11 08:12:05,564] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.843880
[2022-12-11 08:12:45,669] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.015350
[2022-12-11 08:13:24,736] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.781082
[2022-12-11 08:13:25,136] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:13:25,196] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439957
[2022-12-11 08:13:38,982] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.236510
[2022-12-11 08:13:48,643] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.938263
[2022-12-11 08:13:48,644] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523997
[2022-12-11 08:13:48,645] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 08:13:48,891] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [98]/[120]





[2022-12-11 08:13:48,892] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	2.44140625e-07
[2022-12-11 08:13:48,893] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:13:49,209] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.304642
[2022-12-11 08:14:27,129] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.985299
[2022-12-11 08:15:05,232] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.072953
[2022-12-11 08:15:43,063] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.719185
[2022-12-11 08:16:24,137] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.853741
[2022-12-11 08:17:03,619] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.873849
[2022-12-11 08:17:42,750] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.107945
[2022-12-11 08:18:22,017] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.993139
[2022-12-11 08:18:22,477] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:18:22,536] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439976
[2022-12-11 08:18:35,634] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235003
[2022-12-11 08:18:45,485] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.937498
[2022-12-11 08:18:45,486] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523131
[2022-12-11 08:18:45,486] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 08:18:45,734] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [99]/[120]





[2022-12-11 08:18:45,735] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.220703125e-07
[2022-12-11 08:18:45,736] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:18:45,887] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.917205
[2022-12-11 08:19:23,653] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.794045
[2022-12-11 08:20:01,180] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.735653
[2022-12-11 08:20:39,487] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.176221
[2022-12-11 08:21:19,600] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.927649
[2022-12-11 08:21:59,810] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.145738
[2022-12-11 08:22:39,393] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.755054
[2022-12-11 08:23:18,403] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.751323
[2022-12-11 08:23:18,975] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:23:19,035] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.440011
[2022-12-11 08:23:32,210] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.234177
[2022-12-11 08:23:42,610] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.946831
[2022-12-11 08:23:42,611] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523594
[2022-12-11 08:23:42,612] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 08:23:42,860] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [100]/[120]
[2022-12-11 08:23:42,861] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.220703125e-07
[2022-12-11 08:23:42,862] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:23:43,025] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.763207
[2022-12-11 08:24:21,221] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.014331
[2022-12-11 08:24:59,158] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.891152
[2022-12-11 08:25:36,915] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.095493
[2022-12-11 08:26:17,088] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.131514
[2022-12-11 08:26:56,953] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.007191
[2022-12-11 08:27:36,046] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.166681
[2022-12-11 08:28:14,718] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.951619
[2022-12-11 08:28:15,176] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:28:15,238] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.440017
[2022-12-11 08:28:28,280] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.234426
[2022-12-11 08:28:38,096] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.937548
[2022-12-11 08:28:38,097] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523504
[2022-12-11 08:28:38,098] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 08:28:38,348] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [101]/[120]
[2022-12-11 08:28:38,349] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.220703125e-07
[2022-12-11 08:28:38,350] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:28:38,499] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.155215
[2022-12-11 08:29:16,481] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.816831
[2022-12-11 08:29:57,079] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.140667
[2022-12-11 08:30:36,043] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 2.196721
[2022-12-11 08:31:15,082] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.722321
[2022-12-11 08:31:54,533] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.722517
[2022-12-11 08:32:33,275] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.811808
[2022-12-11 08:33:12,197] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.808504
[2022-12-11 08:33:12,646] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:33:12,708] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.440145
[2022-12-11 08:33:26,254] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.234766
[2022-12-11 08:33:36,445] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.931647
[2022-12-11 08:33:36,446] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523441
[2022-12-11 08:33:36,446] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 08:33:36,697] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [102]/[120]





[2022-12-11 08:33:36,698] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.220703125e-07
[2022-12-11 08:33:36,699] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:33:36,863] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.282967
[2022-12-11 08:34:16,947] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.526085
[2022-12-11 08:34:56,552] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.819166
[2022-12-11 08:35:35,325] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.803305
[2022-12-11 08:36:13,288] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.891970
[2022-12-11 08:36:51,968] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.087214
[2022-12-11 08:37:29,043] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.868837
[2022-12-11 08:38:08,773] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.958781
[2022-12-11 08:38:09,225] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:38:09,281] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.440106
[2022-12-11 08:38:22,687] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.234910
[2022-12-11 08:38:32,715] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.934465
[2022-12-11 08:38:32,716] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522915
[2022-12-11 08:38:32,716] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 08:38:32,967] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [103]/[120]
[2022-12-11 08:38:32,967] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.220703125e-07
[2022-12-11 08:38:32,968] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:38:33,146] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.758073
[2022-12-11 08:39:12,015] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.589180
[2022-12-11 08:39:50,738] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.004385
[2022-12-11 08:40:28,717] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.800555
[2022-12-11 08:41:07,029] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.643972
[2022-12-11 08:41:45,373] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.200593
[2022-12-11 08:42:24,153] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.813254
[2022-12-11 08:43:03,387] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.825821
[2022-12-11 08:43:03,866] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:43:03,924] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.440006
[2022-12-11 08:43:17,344] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235758
[2022-12-11 08:43:27,426] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.929965
[2022-12-11 08:43:27,427] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523300
[2022-12-11 08:43:27,428] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 08:43:27,667] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [104]/[120]





[2022-12-11 08:43:27,668] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.220703125e-07
[2022-12-11 08:43:27,669] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:43:27,832] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.109694
[2022-12-11 08:44:06,164] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.824323
[2022-12-11 08:44:45,024] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.997644
[2022-12-11 08:45:23,908] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.824945
[2022-12-11 08:46:02,527] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.007755
[2022-12-11 08:46:40,601] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.820155
[2022-12-11 08:47:19,927] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.951263
[2022-12-11 08:47:58,264] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.757142
[2022-12-11 08:47:58,711] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:47:58,767] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439932
[2022-12-11 08:48:12,173] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235878
[2022-12-11 08:48:22,222] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.932551
[2022-12-11 08:48:22,223] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523621
[2022-12-11 08:48:22,223] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 08:48:22,478] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [105]/[120]
[2022-12-11 08:48:22,479] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.103515625e-08
[2022-12-11 08:48:22,479] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:48:22,652] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.843713
[2022-12-11 08:49:01,426] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.969826
[2022-12-11 08:49:39,964] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.828111
[2022-12-11 08:50:17,808] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.081331
[2022-12-11 08:50:58,117] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.169765
[2022-12-11 08:51:37,871] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.591759
[2022-12-11 08:52:16,098] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.017707
[2022-12-11 08:52:54,770] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.642490
[2022-12-11 08:52:55,219] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:52:55,275] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439951
[2022-12-11 08:53:08,527] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235927
[2022-12-11 08:53:18,481] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.929418
[2022-12-11 08:53:18,482] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523404
[2022-12-11 08:53:18,482] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 08:53:18,736] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [106]/[120]
[2022-12-11 08:53:18,737] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.103515625e-08
[2022-12-11 08:53:18,738] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:53:18,913] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.993222
[2022-12-11 08:53:57,103] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.763375
[2022-12-11 08:54:35,450] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.901575
[2022-12-11 08:55:14,255] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.905314
[2022-12-11 08:55:52,458] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.850694
[2022-12-11 08:56:30,619] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.793867
[2022-12-11 08:57:08,734] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.787886
[2022-12-11 08:57:47,170] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.199156
[2022-12-11 08:57:47,671] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 08:57:47,732] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439927
[2022-12-11 08:58:00,974] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235794
[2022-12-11 08:58:10,934] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.939050
[2022-12-11 08:58:10,935] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523539
[2022-12-11 08:58:10,936] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 08:58:11,186] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [107]/[120]
[2022-12-11 08:58:11,187] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.103515625e-08
[2022-12-11 08:58:11,188] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 08:58:11,354] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.735913
[2022-12-11 08:58:50,151] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.952627
[2022-12-11 08:59:27,743] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.680437
[2022-12-11 09:00:06,001] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.939370
[2022-12-11 09:00:43,971] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.068913
[2022-12-11 09:01:22,843] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.873671
[2022-12-11 09:02:01,324] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.088736
[2022-12-11 09:02:40,032] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.866672
[2022-12-11 09:02:40,461] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:02:40,525] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439909
[2022-12-11 09:02:53,784] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235851
[2022-12-11 09:03:03,731] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.932277
[2022-12-11 09:03:03,732] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523428
[2022-12-11 09:03:03,733] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 09:03:03,982] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [108]/[120]
[2022-12-11 09:03:03,983] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.103515625e-08
[2022-12-11 09:03:03,984] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:03:04,146] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.890099
[2022-12-11 09:03:42,783] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.679985
[2022-12-11 09:04:20,596] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.952204
[2022-12-11 09:04:59,252] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.804102
[2022-12-11 09:05:37,673] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.653419
[2022-12-11 09:06:16,567] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.170761
[2022-12-11 09:06:54,938] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.357107
[2022-12-11 09:07:32,690] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.000166
[2022-12-11 09:07:33,143] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:07:33,198] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439893
[2022-12-11 09:07:46,529] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235396
[2022-12-11 09:07:56,451] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.934185
[2022-12-11 09:07:56,452] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523480
[2022-12-11 09:07:56,453] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 09:07:56,701] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [109]/[120]





[2022-12-11 09:07:56,702] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.103515625e-08
[2022-12-11 09:07:56,703] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:07:56,869] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.759687
[2022-12-11 09:08:35,227] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.665969
[2022-12-11 09:09:13,966] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.106216
[2022-12-11 09:09:52,390] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.951830
[2022-12-11 09:10:30,413] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.860138
[2022-12-11 09:11:09,045] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.763567
[2022-12-11 09:11:46,722] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.090062
[2022-12-11 09:12:24,986] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.475419
[2022-12-11 09:12:25,440] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:12:25,501] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439787
[2022-12-11 09:12:38,765] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.236095
[2022-12-11 09:12:50,253] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.937864
[2022-12-11 09:12:50,254] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522888
[2022-12-11 09:12:50,255] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 09:12:50,505] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [110]/[120]





[2022-12-11 09:12:50,506] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	6.103515625e-08
[2022-12-11 09:12:50,507] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:12:50,672] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.817988
[2022-12-11 09:13:29,207] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.818506
[2022-12-11 09:14:09,549] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.835950
[2022-12-11 09:14:48,178] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.013121
[2022-12-11 09:15:27,238] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.097785
[2022-12-11 09:16:06,005] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.926000
[2022-12-11 09:16:45,351] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.959948
[2022-12-11 09:17:23,947] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.407837
[2022-12-11 09:17:24,423] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:17:24,483] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439745
[2022-12-11 09:17:37,970] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235418
[2022-12-11 09:17:48,119] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.943746
[2022-12-11 09:17:48,120] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522976
[2022-12-11 09:17:48,121] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 09:17:48,371] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [111]/[120]





[2022-12-11 09:17:48,372] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.0517578125e-08
[2022-12-11 09:17:48,373] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:17:48,505] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.030118
[2022-12-11 09:18:27,340] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.894384
[2022-12-11 09:19:07,186] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.336227
[2022-12-11 09:19:46,245] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 1.217448
[2022-12-11 09:20:25,586] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.075004
[2022-12-11 09:21:05,359] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.922525
[2022-12-11 09:21:43,979] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.637185
[2022-12-11 09:22:22,089] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.136929
[2022-12-11 09:22:22,554] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:22:22,612] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439717
[2022-12-11 09:22:35,974] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235425
[2022-12-11 09:22:46,020] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.936157
[2022-12-11 09:22:46,021] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522947
[2022-12-11 09:22:46,021] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 09:22:46,271] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [112]/[120]





[2022-12-11 09:22:46,271] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.0517578125e-08
[2022-12-11 09:22:46,272] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:22:46,435] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.039786
[2022-12-11 09:23:25,180] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.010469
[2022-12-11 09:24:03,499] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.169198
[2022-12-11 09:24:41,610] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.888646
[2022-12-11 09:25:19,843] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.603059
[2022-12-11 09:25:58,857] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.377334
[2022-12-11 09:26:37,124] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.833034
[2022-12-11 09:27:16,089] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.853693
[2022-12-11 09:27:16,529] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:27:16,585] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439733
[2022-12-11 09:27:29,960] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.234921
[2022-12-11 09:27:40,017] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.946158
[2022-12-11 09:27:40,017] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523029
[2022-12-11 09:27:40,018] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 09:27:40,270] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [113]/[120]
[2022-12-11 09:27:40,271] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.0517578125e-08
[2022-12-11 09:27:40,272] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:27:40,485] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.886707
[2022-12-11 09:28:20,023] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.789881
[2022-12-11 09:28:58,083] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.227282
[2022-12-11 09:29:36,728] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.837777
[2022-12-11 09:30:14,986] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.758116
[2022-12-11 09:30:53,922] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.613611
[2022-12-11 09:31:32,090] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.983959
[2022-12-11 09:32:10,460] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.788229
[2022-12-11 09:32:10,939] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:32:10,996] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439708
[2022-12-11 09:32:24,365] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235011
[2022-12-11 09:32:34,384] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.937266
[2022-12-11 09:32:34,386] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522867
[2022-12-11 09:32:34,386] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 09:32:34,636] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [114]/[120]





[2022-12-11 09:32:34,638] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.0517578125e-08
[2022-12-11 09:32:34,638] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:32:34,797] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.994358
[2022-12-11 09:33:13,799] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.184855
[2022-12-11 09:33:52,254] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 1.218149
[2022-12-11 09:34:30,187] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.858857
[2022-12-11 09:35:08,838] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.005558
[2022-12-11 09:35:47,178] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.874749
[2022-12-11 09:36:25,822] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 1.885656
[2022-12-11 09:37:04,398] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.895819
[2022-12-11 09:37:04,839] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:37:04,895] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439677
[2022-12-11 09:37:18,279] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235000
[2022-12-11 09:37:28,340] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.940938
[2022-12-11 09:37:28,341] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522891
[2022-12-11 09:37:28,341] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 09:37:28,578] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [115]/[120]
[2022-12-11 09:37:28,579] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.0517578125e-08
[2022-12-11 09:37:28,580] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:37:28,821] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.564596
[2022-12-11 09:38:07,628] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.804373
[2022-12-11 09:38:46,314] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.745998
[2022-12-11 09:39:24,622] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.740682
[2022-12-11 09:40:02,955] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.915682
[2022-12-11 09:40:41,569] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.026609
[2022-12-11 09:41:20,307] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.704995
[2022-12-11 09:41:58,105] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.823047
[2022-12-11 09:41:58,545] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:41:58,607] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439669
[2022-12-11 09:42:11,990] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235160
[2022-12-11 09:42:22,012] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.926044
[2022-12-11 09:42:22,013] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522746
[2022-12-11 09:42:22,014] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 09:42:22,265] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [116]/[120]
[2022-12-11 09:42:22,266] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	3.0517578125e-08
[2022-12-11 09:42:22,266] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:42:22,449] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 1.125047
[2022-12-11 09:43:01,530] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.888969
[2022-12-11 09:43:40,106] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.819391
[2022-12-11 09:44:18,558] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.726840
[2022-12-11 09:44:56,951] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.048349
[2022-12-11 09:45:35,582] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 1.259939
[2022-12-11 09:46:14,733] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.795965
[2022-12-11 09:46:53,811] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.979557
[2022-12-11 09:46:54,269] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:46:54,326] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439652
[2022-12-11 09:47:07,711] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235147
[2022-12-11 09:47:17,737] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.940599
[2022-12-11 09:47:17,738] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522903
[2022-12-11 09:47:17,738] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 09:47:17,989] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [117]/[120]
[2022-12-11 09:47:17,990] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.52587890625e-08
[2022-12-11 09:47:17,991] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:47:18,209] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.670224
[2022-12-11 09:47:57,259] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.888557
[2022-12-11 09:48:35,648] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.826319
[2022-12-11 09:49:14,934] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.849429
[2022-12-11 09:49:54,451] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 2.311510
[2022-12-11 09:50:33,400] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.714950
[2022-12-11 09:51:12,344] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.857178
[2022-12-11 09:51:51,104] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.963221
[2022-12-11 09:51:51,662] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:51:51,717] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439659
[2022-12-11 09:52:05,293] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235087
[2022-12-11 09:52:15,459] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.932669
[2022-12-11 09:52:15,460] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522973
[2022-12-11 09:52:15,461] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 09:52:15,711] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [118]/[120]





[2022-12-11 09:52:15,712] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.52587890625e-08
[2022-12-11 09:52:15,712] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:52:15,901] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.737584
[2022-12-11 09:52:53,878] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.685893
[2022-12-11 09:53:32,771] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.777075
[2022-12-11 09:54:12,236] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.813399
[2022-12-11 09:54:51,013] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 0.712354
[2022-12-11 09:55:29,792] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.859052
[2022-12-11 09:56:08,619] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.887819
[2022-12-11 09:56:48,052] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.868598
[2022-12-11 09:56:48,461] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 09:56:48,521] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439659
[2022-12-11 09:57:02,115] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235102
[2022-12-11 09:57:12,293] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.934327
[2022-12-11 09:57:12,294] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522986
[2022-12-11 09:57:12,294] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory
[2022-12-11 09:57:12,546] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [119]/[120]





[2022-12-11 09:57:12,547] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.52587890625e-08
[2022-12-11 09:57:12,548] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 09:57:12,762] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.758204
[2022-12-11 09:57:52,005] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 1.364741
[2022-12-11 09:58:29,730] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.665444
[2022-12-11 09:59:07,517] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.827240
[2022-12-11 09:59:47,960] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.081047
[2022-12-11 10:00:26,734] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.703269
[2022-12-11 10:01:05,156] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.951981
[2022-12-11 10:01:44,090] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 0.933009
[2022-12-11 10:01:44,540] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 10:01:44,594] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439657
[2022-12-11 10:01:57,972] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235111
[2022-12-11 10:02:08,058] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.925966
[2022-12-11 10:02:08,059] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.523054
[2022-12-11 10:02:08,059] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





[2022-12-11 10:02:08,339] {<ipython-input-12-cb52601f4b8e>:36} INFO - Epoch [120]/[120]
[2022-12-11 10:02:08,340] {<ipython-input-12-cb52601f4b8e>:39} INFO - LR:	1.52587890625e-08
[2022-12-11 10:02:08,340] {<ipython-input-12-cb52601f4b8e>:71} INFO - Train loop


HBox(children=(FloatProgress(value=0.0, max=2105.0), HTML(value='')))

[2022-12-11 10:02:08,522] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [0]/[2105] loss: 0.985007
[2022-12-11 10:02:48,224] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [300]/[2105] loss: 0.677141
[2022-12-11 10:03:27,897] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [600]/[2105] loss: 0.751553
[2022-12-11 10:04:06,710] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [900]/[2105] loss: 0.952548
[2022-12-11 10:04:44,776] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1200]/[2105] loss: 1.109790
[2022-12-11 10:05:23,141] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1500]/[2105] loss: 0.794390
[2022-12-11 10:06:02,369] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [1800]/[2105] loss: 0.870354
[2022-12-11 10:06:41,767] {<ipython-input-12-cb52601f4b8e>:126} INFO - Iteration [2100]/[2105] loss: 1.082895
[2022-12-11 10:06:42,195] {<ipython-input-12-cb52601f4b8e>:130} INFO - Eval loop





HBox(children=(FloatProgress(value=0.0, max=527.0), HTML(value='')))

[2022-12-11 10:06:42,259] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [0]/[527] loss: 2.439669
[2022-12-11 10:06:55,746] {<ipython-input-12-cb52601f4b8e>:163} INFO - Iteration [300]/[527] loss: 2.235041
[2022-12-11 10:07:05,874] {<ipython-input-12-cb52601f4b8e>:57} INFO - Train loss avg:	0.935838
[2022-12-11 10:07:05,875] {<ipython-input-12-cb52601f4b8e>:58} INFO - Eval loss avg:	2.522917
[2022-12-11 10:07:05,876] {<ipython-input-12-cb52601f4b8e>:64} INFO - Clean up CUDA memory





In [14]:
# kill tensorboard
# taskkill /IM "tensorboard.exe" /F

**(2 points)** Write a translation function with sampling from a distribution with temperature.

In [15]:
encoder, decoder = en2ru_train.encoder, en2ru_train.decoder
encoder.eval()
decoder.eval()
encoder = encoder.to("cpu")
decoder = decoder.to("cpu")

In [26]:
def evaluate(model: Tuple[nn.Module], sentence: str, eng2idx: Dict[str, int], idx2rus: Dict[str, int], 
             temp: Union[int, float] = 1.0, max_seq_length: int = 20, batch_size: int = 10, 
             sos_token: int = 1) -> None:
    encoder, decoder = model
    
    # encode sentence to token sequence
    encoded = sentence2idx(seq=sentence, tok2idx=eng2idx)
    print(encoded)
    
    output = []
    with torch.no_grad():
        # repeat encoded sentence to batch_size times
        z = torch.LongTensor(encoded).view(1, -1).repeat(batch_size, 1)
        
        # feed forward throught encoder
        encoder_outputs, hidden = encoder(z, encoder.init_hidden(batch_size=batch_size))
        
        # iterate over decoder till you will achive max sequence length
        decoder_input = torch.full(size=(batch_size, 1), fill_value=sos_token)
        for i in range(max_seq_length):
            decoder_outputs, hidden = decoder(decoder_input, hidden)
            decoder_input = F.softmax(decoder_outputs / temp, dim=2)
            decoder_input = decoder_input.argmax(dim=2)
            output.append(decoder_input.numpy())
    
    output = np.concatenate(output, axis=1).T
    for s in output:
        out = idx2sentence(s, idx2rus)
        print(out.replace("PAD", ""))

    
evaluate(model=(encoder, decoder), sentence="What is going on?", eng2idx=eng2idx, idx2rus=idx2rus)

[1, 23, 14, 63, 46, 9, 2]
что что что что что что что что что что
происходит происходит происходит происходит происходит происходит происходит происходит происходит происходит
? ? ? ? ? ? ? ? ? ?
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
EOS EOS EOS EOS EOS EOS EOS EOS EOS EOS
