In [1]:
from google.colab import drive
import os
drive.mount('/content/drive')
os.chdir('/content/drive/My Drive/PMLDL/AssignmentIII')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install youtokentome

import os
import math
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch import nn, Tensor
from torch.utils.data import Dataset, DataLoader
from torch.nn import Transformer
from torch.nn.utils.rnn import pad_sequence
import torch.nn.functional as F
import youtokentome as yttm
from functools import partial
from nltk.translate.bleu_score import corpus_bleu
from tqdm import tqdm



In [3]:
DATA_PATH = './'
DATASET_PATH = DATA_PATH + 'corpus.en_ru.1m'
VOCAB_SIZE = 2 ** 15
TEMP_FILE_PATH = 'tokenizer_text.temp'
TOKENIZER_PATH = f'{DATASET_PATH}_v{VOCAB_SIZE}.tokenizer'
DATASET_SIZE = 10000

PAD_TOKEN = 0
UNK_TOKEN = 1
BOS_TOKEN = 2
EOS_TOKEN = 3

N_EPOCH = 20
BATCH_SIZE = 16
LEARNING_RATE = 0.01
EMB_SIZE = 512
N_HEAD = 8
N_HID = 2048
N_LAYERS = 6
DROPOUT = 0.1
MODEL_SAVE_PATH = './saved-models'
SAVED_MODEL = None

if MODEL_SAVE_PATH:
    os.makedirs(MODEL_SAVE_PATH, exist_ok=True)

In [4]:
print('Loading data...')
data_en, data_ru = [], []
with open(DATASET_PATH + '.en', encoding='utf8') as in_file:
    data_en.extend(in_file.readlines())
with open(DATASET_PATH + '.ru', encoding='utf8') as in_file:
    data_ru.extend(in_file.readlines())
raw_data = {'English': [line for line in data_en], 'Russian': [line for line in data_ru]}
df = pd.DataFrame(raw_data, columns=list(raw_data.keys()))
# Limit the dataset size.
if DATASET_SIZE:
    df = df[:DATASET_SIZE]

print('Creating train, test, val sets...')
train, test = train_test_split(df, test_size=0.2)
test, val = train_test_split(test, test_size=0.5)
train.to_csv(os.path.join(DATA_PATH, 'train.csv'), index=False)
test.to_csv(os.path.join(DATA_PATH, 'test.csv'), index=False)
val.to_csv(os.path.join(DATA_PATH, 'val.csv'), index=False)

print('Creating tokenizer...')
with open(TEMP_FILE_PATH, 'w', encoding='utf8') as out_file:
    out_file.write('\n'.join(map(str.lower, data_en)))
    out_file.write('\n'.join(map(str.lower, data_ru)))
# Train tokenizer.
tokenizer = yttm.BPE.train(data=TEMP_FILE_PATH, vocab_size=VOCAB_SIZE, model=TOKENIZER_PATH,
                           pad_id=PAD_TOKEN, unk_id=UNK_TOKEN, bos_id=BOS_TOKEN, eos_id=EOS_TOKEN)
# Delete temp file.
os.remove(TEMP_FILE_PATH)
print('Finished!')

Loading data...
Creating train, test, val sets...
Creating tokenizer...
Finished!


In [5]:
class TextDataset(Dataset):
    output_types = {'id': yttm.OutputType.ID, 'subword': yttm.OutputType.SUBWORD}

    def __init__(self, csv_file, tokenizer, max_len=50, max_len_ratio=1.5):
        self.tokenizer = tokenizer
        df = pd.read_csv(csv_file)
        # Tokenize sentences using tokenizer.
        tokenize_lambda = lambda x: self.tokenize(x.lower().strip(), 'subword')
        df['eng_enc'] = df.English.apply(tokenize_lambda)
        df['rus_enc'] = df.Russian.apply(tokenize_lambda)
        # Delete sentences that exceed the max length and max length ratio.
        df['en_len'] = df['eng_enc'].str.len()
        df['ru_len'] = df['rus_enc'].str.len()
        df.query(f'ru_len < {max_len} & en_len < {max_len}', inplace=True)
        df.query(f'ru_len < en_len * {max_len_ratio} & ru_len * {max_len_ratio} > en_len', inplace=True)
        # Sort the values for less padding in batching.
        df.sort_values(['ru_len', 'en_len'], ascending=[False, False], inplace=True)
        raw_src, raw_tgt = zip(df[['Russian', 'English']].T.values)
        src, tgt = zip(df[['rus_enc', 'eng_enc']].T.values)
        self.tgt, self.src = tgt[0], src[0]
        self.raw_src, self.raw_tgt = raw_src[0], raw_tgt[0]

    def tokenize(self, s, output_type='id'):
        """Tokenize the sentence.
        :param s: the sentence to tokenize
        :param output_type: either 'id' or 'subword' for corresponding output
        :return: tokenized sentence"""
        return self.tokenizer.encode(s, output_type=self.output_types[output_type],
                                     bos=True, eos=True)

    def decode(self, tokens):
        return self.tokenizer.id_to_subword(tokens)

    def __len__(self):
        return len(self.src)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        src = self.src[idx]
        src = [self.tokenizer.subword_to_id(token) for token in src]
        tgt = self.tgt[idx]
        tgt = [self.tokenizer.subword_to_id(token) for token in tgt]
        return src, tgt

In [6]:
def load_datasets(path, tokenizer, ext='.csv'):
    res = []
    for name in ['train', 'val', 'test']:
        dataset_path = os.path.join(path, name + ext)
        res.append(TextDataset(dataset_path, tokenizer))
    return res


def make_dataloaders(datasets, batch_size, pad_token, num_workers=0):
    res = []
    for dataset in datasets:
        res.append(
            DataLoader(
                dataset, batch_size=batch_size,
                shuffle=False, num_workers=num_workers,
                collate_fn=partial(my_collate, pad_token=pad_token)
            )
        )
    return res


def my_collate(batch, pad_token=0):
    src, tgt = zip(*batch)
    src = [Tensor(s) for s in src]
    tgt = [Tensor(t) for t in tgt]
    src = pad_sequence(src, batch_first=True, padding_value=pad_token).long()
    tgt = pad_sequence(tgt, batch_first=True, padding_value=pad_token).long()
    return src.t(), tgt.t()

In [7]:
class TransformerModel(nn.Module):

    def __init__(self, ntokens_src, ntokens_tgt, ninp, nhead, dim_feedforward,
                 nlayers, pad_token, dropout=0.5):
        super(TransformerModel, self).__init__()
        self.model_type = 'Transformer'
        self.ninp = ninp
        self.pad_token = pad_token
        self.masks = {
            'src': None,
            'tgt': None,
            'memory': None,
        }
        # Token Encoders
        self.src_encoder = nn.Embedding(ntokens_src, ninp)
        self.tgt_encoder = nn.Embedding(ntokens_tgt, ninp)
        # Positional Encoding
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        # Transformer
        self.transformer = Transformer(
            d_model=ninp,
            nhead=nhead,
            num_encoder_layers=nlayers,
            num_decoder_layers=nlayers,
            dropout=dropout,
            dim_feedforward=dim_feedforward,
        )
        self.out = nn.Linear(ninp, ntokens_tgt)

        self.init_weights()

    def generate_square_subsequent_mask(self, sx, sy=None):
        """Generate matrix for seqential reveal of tokens."""
        sy = sy or sx
        mask = (torch.triu(torch.ones((sx, sy))) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        self.src_encoder.weight.data.uniform_(-initrange, initrange)
        self.tgt_encoder.weight.data.uniform_(-initrange, initrange)
        self.out.bias.data.zero_()
        self.out.weight.data.uniform_(-initrange, initrange)

    def preprocess(self, x, x_type):
        # Create masks
        padding_mask = (x == self.pad_token).bool().t()
        if self.masks[x_type] is None or self.masks[x_type].size(0) != len(x):
            self.masks[x_type] = self.generate_square_subsequent_mask(len(x), len(x)).to(x.device)

        x_enc = self.src_encoder(x) if x_type == 'src' else self.tgt_encoder(x)
        x_enc *= math.sqrt(self.ninp)
        x_enc = self.pos_encoder(x_enc)

        return x_enc, self.masks[x_type], padding_mask

    def forward(self, src, tgt):
        if (self.masks['memory'] is None or
                self.masks['src'].size(0) != len(src) or
                self.masks['tgt'].size(0) != len(tgt)):
            self.masks['memory'] = self.generate_square_subsequent_mask(len(src), len(tgt)).to(src.device)

        src_enc, _, src_key_padding_mask = self.preprocess(src, 'src')
        tgt_enc, _, tgt_key_padding_mask = self.preprocess(tgt, 'tgt')
        memory_key_padding_mask = src_key_padding_mask.clone().detach()

        output = self.transformer(src_enc, tgt_enc,
                                  src_mask=self.masks['src'],
                                  tgt_mask=self.masks['tgt'],
                                  memory_mask=self.masks['memory'],
                                  src_key_padding_mask=src_key_padding_mask,
                                  tgt_key_padding_mask=tgt_key_padding_mask,
                                  memory_key_padding_mask=memory_key_padding_mask,
                                  )
        output = self.out(output)
        return output


class PositionalEncoding(nn.Module):
    # From https://pytorch.org/tutorials/beginner/transformer_tutorial.html

    def __init__(self, d_model, dropout=0.1):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        self.d_model = d_model

    def create_pe(self, seq_len):
        pe = torch.zeros(seq_len, self.d_model)
        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, self.d_model, 2).float() * (-math.log(10000.0) / self.d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        return pe

    def forward(self, x):
        pe = self.create_pe(x.size(0))
        x = x + pe.to(x.device)
        return self.dropout(x)

In [8]:
def run_model(model, criterion, optimizer, data_iterator, is_train_phase, desc=''):
    """Run one epoch of a model with given data.
    
    :param model: model to run on
    :param criterion: criterion to use
    :param optimizer: optimizer to use
    :param data_iterator: iterator of (x, y) data tuples
    :param is_train_phase: True if you want to train
    :param desc: description for tqdm bar
    :return: epoch loss
    """
    if is_train_phase:
        model.train()  # Turn on the train mode
    else:
        model.eval()  # Turn on the evaluation mode
    total_loss = 0.0
    pbar = tqdm(total=len(data_iterator), desc=desc)
    for i, (src, tgt) in enumerate(data_iterator):
        src, tgt = src.to(device), tgt.to(device)

        optimizer.zero_grad()
        tgt_in = tgt[:-1]
        tgt_out = tgt[1:]

        with torch.set_grad_enabled(is_train_phase):
            output = model(src, tgt_in).transpose(1, 2)
            loss = criterion(output, tgt_out)

            if is_train_phase:
                loss.backward()
                # Clip gradient to deal with gradient explosion
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.)
                optimizer.step()
        total_loss += loss.item()
        pbar.update(1)
        pbar.set_description(desc + f' - loss: {total_loss / (i + 1):7.4}')
    pbar.close()
    return total_loss / (i + 1)

In [9]:
def train_model(model, n_epochs, data_iterators, criterion, optimizer,
                scheduler=None, model_save_path=None):
    losses = {'train': [],
              'val': []}

    for epoch in range(n_epochs):
        lr = optimizer.state_dict()['param_groups'][0]['lr']
        tqdm.write(f'------------ Epoch {epoch}; lr: {lr:.5f} ------------')
        for phase in ['train', 'val']:
            desc = f'{phase.title()} Epoch #{epoch}'
            epoch_loss = run_model(model, criterion, optimizer, data_iterators[phase], phase == 'train', desc)
            losses[phase].append(epoch_loss)
            print_hist = lambda l: ' -> '.join(map(lambda x: f"{x:.4}", l[-2:]))
            tqdm.write(f'{phase.title()} Loss: {print_hist(losses[phase])}')
        tqdm.write('Saving model...')
        if model_save_path:
            try:
                torch.save(model, os.path.join(model_save_path, f"epoch{str(epoch)}-loss{losses['val'][-1]}.pt"))
                tqdm.write('Saved successfully')
            except FileNotFoundError:
                tqdm.write('Error during saving!')
        try:
            translate(model, tokenizer, 'Обучение машинное, а учимся мы.', verbose=True)
            rand_ind = np.random.randint(0, len(data_iterators['test']))
            translate(model, tokenizer, data_iterators['test'].raw_src[rand_ind], verbose=True)
        except:
            tqdm.write('Error while translation.')
        if scheduler:
            scheduler.step()
    return losses

In [10]:
def tokens_to_str(tokenizer, tokens):
    return subword_to_str([tokenizer.id_to_subword(ix) for ix in tokens])


def subword_to_str(tokens):
    return ''.join(tokens).replace('▁', ' ')


def translate(model, tokenizer, text, max_len=80, verbose=False):
    model.eval()
    # Get the device the model is stored on.
    device = next(model.parameters()).device
    # Preprocess the text.
    text = text.strip().lower()

    if verbose:
        print('------------ Translation ------------')
        print(f'Input: {text}')
    # Prepare text
    src = tokenizer.encode(text, output_type=yttm.OutputType.ID,
                           bos=True, eos=True)
    src = Tensor(src).long().to(device)
    # Run encoder
    src_enc, src_mask, _ = model.preprocess(src, 'src')
    e_outputs = model.transformer.encoder(src_enc, src_mask)

    # Prepare tensor for answers
    outputs = torch.zeros(max_len).type_as(src.data)
    # Set the first token as '<BOS>'
    outputs[0] = torch.LongTensor([BOS_TOKEN])
    for i in range(1, max_len):
        outputs_enc, tgt_mask, _ = model.preprocess(outputs[:i].unsqueeze(1), 'tgt')
        d_out = model.transformer.decoder(outputs_enc, e_outputs, tgt_mask=tgt_mask)
        out = model.out(d_out)
        out = F.softmax(out, dim=-1)
        val, ix = out.data.topk(3, dim=-1)
        outputs[i] = ix[-1][0][0]
        if outputs[i] == EOS_TOKEN:
            break
    result = tokens_to_str(tokenizer, outputs[:i + 1])
    if verbose:
        print('Output weights: ')
        for j in range(min(3, i)):
            print(f'{j}', {tokenizer.id_to_subword(k): v.item() for k, v in zip(ix[j][0], val[j][0])})
        print(f'Translation: {result}')
    return result

In [11]:
def calc_BLEU(model, data):
    originals, translations = [], []
    pbar = tqdm(total=len(data), desc='Test BLEU score')
    for raw_src, raw_tgt in zip(data.raw_src, data.raw_tgt):
        originals.append([raw_tgt])
        translation = translate(model, tokenizer, raw_src)
        translation = translation.replace('<BOS>', '').replace('<EOS>', '')
        translations.append(translation)
        pbar.update(1)
    pbar.close()
    score = corpus_bleu(originals, translations, weights=(0.25, 0.25, 0.25, 0.25))
    print(f'Test BLEU score - {score:.4f}')
    return (score, originals, translations)

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load tokenizer.
tokenizer = yttm.BPE(model=TOKENIZER_PATH)

ntokens_src = tokenizer.vocab_size()
ntokens_tgt = tokenizer.vocab_size()

# Load data.
print('Loading data...')
train_data, val_data, test_data = load_datasets(DATA_PATH, tokenizer)
print('Train set len:', len(train_data),
      '\nVal set len:', len(val_data),
      '\nTest set len:', len(test_data))

# Make dataloaders.
print('Making dataloaders...')
(train_iterator, val_iterator) = make_dataloaders([train_data, val_data],
                                                  batch_size=BATCH_SIZE,
                                                  pad_token=PAD_TOKEN,
                                                  num_workers=0)

data_iterators = {
    'train': train_iterator,
    'val': val_iterator,
    'test': test_data,
}

if SAVED_MODEL:
    print('Loading saved model...')
    model = torch.load(os.path.join(MODEL_SAVE_PATH, SAVED_MODEL))
else:
    print('Creating model...')
    model = TransformerModel(ntokens_src, ntokens_tgt,
                             EMB_SIZE, N_HEAD, N_HID,
                             N_LAYERS, PAD_TOKEN,
                             DROPOUT).to(device)

# Ignore padding index during the loss computation.
criterion = nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE,
                             betas=(0.9, 0.98), eps=10e-09)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.95)
losses = train_model(model, N_EPOCH, data_iterators,
                     criterion, optimizer, scheduler, MODEL_SAVE_PATH)

Loading data...
Train set len: 6611 
Val set len: 840 
Test set len: 850
Making dataloaders...
Creating model...


Train Epoch #0 - loss:   11.44:   0%|          | 1/414 [00:00<00:57,  7.17it/s]

------------ Epoch 0; lr: 0.01000 ------------


Train Epoch #0 - loss:   7.786: 100%|██████████| 414/414 [00:36<00:00, 11.43it/s]
Val Epoch #0 - loss:   8.073:  11%|█▏        | 6/53 [00:00<00:01, 33.41it/s]

Train Loss: 7.786


Val Epoch #0 - loss:    7.73: 100%|██████████| 53/53 [00:01<00:00, 35.62it/s]


Val Loss: 7.73
Saving model...


Train Epoch #1 - loss:   8.142:   0%|          | 1/414 [00:00<00:45,  9.14it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.2740316390991211, '▁the': 0.02477779984474182, '.': 0.017958257347345352}
Translation: <BOS><EOS>
------------ Translation ------------
Input: при некоторых типах онкологической патологии и специфического лечения, отмечалась задержка роста, однако дети восполняли эту задержку после рождения.
Output weights: 
0 {'<EOS>': 0.2740316390991211, '▁the': 0.02477779984474182, '.': 0.017958257347345352}
Translation: <BOS><EOS>
------------ Epoch 1; lr: 0.01000 ------------


Train Epoch #1 - loss:   7.296: 100%|██████████| 414/414 [00:36<00:00, 11.28it/s]
Val Epoch #1 - loss:   8.414:  11%|█▏        | 6/53 [00:00<00:01, 32.53it/s]

Train Loss: 7.786 -> 7.296


Val Epoch #1 - loss:   8.117: 100%|██████████| 53/53 [00:01<00:00, 35.87it/s]


Val Loss: 7.73 -> 8.117
Saving model...


Train Epoch #2 - loss:   8.147:   0%|          | 1/414 [00:00<00:46,  8.84it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.07956637442111969, '▁the': 0.015011890791356564, '.': 0.014645989052951336}
Translation: <BOS><EOS>
------------ Translation ------------
Input: кроме того, если кризис на финансовых рынках продолжится, что будет сопровождаться резким падением в реальном секторе экономики, то ес может оказаться под угрозой, если не сможет быстро принять решение на политическом уровне.
Output weights: 
0 {'<EOS>': 0.07956637442111969, '▁the': 0.015011894516646862, '.': 0.014645989052951336}
Translation: <BOS><EOS>
------------ Epoch 2; lr: 0.01000 ------------


Train Epoch #2 - loss:   7.299: 100%|██████████| 414/414 [00:36<00:00, 11.40it/s]
Val Epoch #2 - loss:   8.464:  11%|█▏        | 6/53 [00:00<00:01, 33.13it/s]

Train Loss: 7.296 -> 7.299


Val Epoch #2 - loss:    8.11: 100%|██████████| 53/53 [00:01<00:00, 37.52it/s]


Val Loss: 8.117 -> 8.11
Saving model...


Train Epoch #3 - loss:   8.164:   0%|          | 1/414 [00:00<00:45,  9.12it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.14229699969291687, '▁the': 0.01351667009294033, '.': 0.012536408379673958}
Translation: <BOS><EOS>
------------ Translation ------------
Input: в ступке вдруг вспыхнуло яркое пламя, повалил густой розовый дым и завоняло жжеными перьями и серой.
Output weights: 
0 {'<EOS>': 0.14229705929756165, '▁the': 0.013516669161617756, '.': 0.012536408379673958}
Translation: <BOS><EOS>
------------ Epoch 3; lr: 0.00950 ------------


Train Epoch #3 - loss:   7.268: 100%|██████████| 414/414 [00:37<00:00, 11.07it/s]
Val Epoch #3 - loss:    8.43:  11%|█▏        | 6/53 [00:00<00:01, 32.71it/s]

Train Loss: 7.299 -> 7.268


Val Epoch #3 - loss:   8.077: 100%|██████████| 53/53 [00:01<00:00, 34.92it/s]


Val Loss: 8.11 -> 8.077
Saving model...


Train Epoch #4 - loss:   8.126:   0%|          | 1/414 [00:00<00:44,  9.29it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.13780032098293304, '▁the': 0.018183130770921707, '.': 0.01469822321087122}
Translation: <BOS><EOS>
------------ Translation ------------
Input: сегодня микроволновая печь революционизировала способ приготовления американской еды.
Output weights: 
0 {'<EOS>': 0.13780032098293304, '▁the': 0.018183130770921707, '.': 0.01469822321087122}
Translation: <BOS><EOS>
------------ Epoch 4; lr: 0.00950 ------------


Train Epoch #4 - loss:   7.253: 100%|██████████| 414/414 [00:36<00:00, 11.21it/s]
Val Epoch #4 - loss:    8.49:  11%|█▏        | 6/53 [00:00<00:01, 32.66it/s]

Train Loss: 7.268 -> 7.253


Val Epoch #4 - loss:   8.112: 100%|██████████| 53/53 [00:01<00:00, 35.74it/s]


Val Loss: 8.077 -> 8.112
Saving model...


Train Epoch #5 - loss:   8.163:   0%|          | 1/414 [00:00<00:46,  8.91it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.16117136180400848, '▁the': 0.022421615198254585, '.': 0.0172676220536232}
Translation: <BOS><EOS>
------------ Translation ------------
Input: следуйте их рекомендациям.
Output weights: 
0 {'<EOS>': 0.16117143630981445, '▁the': 0.022421613335609436, '.': 0.0172676220536232}
Translation: <BOS><EOS>
------------ Epoch 5; lr: 0.00950 ------------


Train Epoch #5 - loss:   7.244: 100%|██████████| 414/414 [00:36<00:00, 11.22it/s]
Val Epoch #5 - loss:   8.503:  11%|█▏        | 6/53 [00:00<00:01, 33.52it/s]

Train Loss: 7.253 -> 7.244


Val Epoch #5 - loss:   8.154: 100%|██████████| 53/53 [00:01<00:00, 33.35it/s]


Val Loss: 8.112 -> 8.154
Saving model...


Train Epoch #6 - loss:   8.125:   0%|          | 1/414 [00:00<00:46,  8.79it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.11054738610982895, '.': 0.009801654145121574, '▁the': 0.009596781805157661}
Translation: <BOS><EOS>
------------ Translation ------------
Input: следующие из отсутствующих заголовков новостей, касаются перемещения граждан:
Output weights: 
0 {'<EOS>': 0.11054738610982895, '.': 0.009801654145121574, '▁the': 0.009596781805157661}
Translation: <BOS><EOS>
------------ Epoch 6; lr: 0.00903 ------------


Train Epoch #6 - loss:   7.237: 100%|██████████| 414/414 [00:36<00:00, 11.27it/s]
Val Epoch #6 - loss:   8.477:  11%|█▏        | 6/53 [00:00<00:01, 32.77it/s]

Train Loss: 7.244 -> 7.237


Val Epoch #6 - loss:   8.103: 100%|██████████| 53/53 [00:01<00:00, 35.40it/s]


Val Loss: 8.154 -> 8.103
Saving model...


Train Epoch #7 - loss:   8.143:   0%|          | 1/414 [00:00<00:45,  9.15it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.2296648472547531, '▁the': 0.022821009159088135, '.': 0.016698328778147697}
Translation: <BOS><EOS>
------------ Translation ------------
Input: послушайте, настанет день, когда не нужно будет задаваться этим вопросом; вы просто сможете сказать: "я думаю, что отправлюсь туда и узнаю это."
Output weights: 
0 {'<EOS>': 0.2296648472547531, '▁the': 0.022821009159088135, '.': 0.016698328778147697}
Translation: <BOS><EOS>
------------ Epoch 7; lr: 0.00903 ------------


Train Epoch #7 - loss:   7.234: 100%|██████████| 414/414 [00:37<00:00, 11.12it/s]
Val Epoch #7 - loss:   8.507:  11%|█▏        | 6/53 [00:00<00:01, 32.71it/s]

Train Loss: 7.237 -> 7.234


Val Epoch #7 - loss:   8.135: 100%|██████████| 53/53 [00:01<00:00, 36.02it/s]


Val Loss: 8.103 -> 8.135
Saving model...


Train Epoch #8 - loss:   8.111:   0%|          | 1/414 [00:00<00:46,  8.81it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.21267163753509521, '▁the': 0.01597651094198227, '.': 0.015934249386191368}
Translation: <BOS><EOS>
------------ Translation ------------
Input: я верю, что мы будем жить в демократическом государстве.
Output weights: 
0 {'<EOS>': 0.21267163753509521, '▁the': 0.01597651094198227, '.': 0.015934249386191368}
Translation: <BOS><EOS>
------------ Epoch 8; lr: 0.00903 ------------


Train Epoch #8 - loss:   7.236: 100%|██████████| 414/414 [00:37<00:00, 11.16it/s]
Val Epoch #8 - loss:   8.411:  11%|█▏        | 6/53 [00:00<00:01, 32.31it/s]

Train Loss: 7.234 -> 7.236


Val Epoch #8 - loss:   8.051: 100%|██████████| 53/53 [00:01<00:00, 36.57it/s]


Val Loss: 8.135 -> 8.051
Saving model...


Train Epoch #9 - loss:   8.069:   0%|          | 1/414 [00:00<00:55,  7.40it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.1519080400466919, '.': 0.0209706611931324, '▁the': 0.017373694106936455}
Translation: <BOS><EOS>
------------ Translation ------------
Input: красавицы, воздушные, как облако, созданные волшебством ваших гениальных поэтов, посещали меня ночью и шептали мне чудные сказки, от которых пьянела моя голова.
Output weights: 
0 {'<EOS>': 0.1519080400466919, '.': 0.0209706611931324, '▁the': 0.017373694106936455}
Translation: <BOS><EOS>
------------ Epoch 9; lr: 0.00857 ------------


Train Epoch #9 - loss:   7.231: 100%|██████████| 414/414 [00:37<00:00, 11.15it/s]
Val Epoch #9 - loss:   8.428:  11%|█▏        | 6/53 [00:00<00:01, 32.63it/s]

Train Loss: 7.236 -> 7.231


Val Epoch #9 - loss:   8.071: 100%|██████████| 53/53 [00:01<00:00, 35.86it/s]


Val Loss: 8.051 -> 8.071
Saving model...


Train Epoch #10 - loss:   7.999:   0%|          | 1/414 [00:00<00:46,  8.80it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.1872425377368927, '▁the': 0.017441608011722565, '.': 0.016163073480129242}
Translation: <BOS><EOS>
------------ Translation ------------
Input: стоимость ремонтной мастерской была больше чем на одну треть выплачена.
Output weights: 
0 {'<EOS>': 0.1872425377368927, '▁the': 0.017441608011722565, '.': 0.016163073480129242}
Translation: <BOS><EOS>
------------ Epoch 10; lr: 0.00857 ------------


Train Epoch #10 - loss:   7.233: 100%|██████████| 414/414 [00:37<00:00, 10.94it/s]
Val Epoch #10 - loss:   8.472:  11%|█▏        | 6/53 [00:00<00:01, 32.92it/s]

Train Loss: 7.231 -> 7.233


Val Epoch #10 - loss:   8.137: 100%|██████████| 53/53 [00:01<00:00, 36.74it/s]


Val Loss: 8.071 -> 8.137
Saving model...


Train Epoch #11 - loss:   8.014:   0%|          | 1/414 [00:00<00:45,  9.15it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.08475016802549362, '▁the': 0.012208942323923111, '.': 0.010455865412950516}
Translation: <BOS><EOS>
------------ Translation ------------
Input: ниже представлен скриншот admsnmp предположительных community строк.
Output weights: 
0 {'<EOS>': 0.08475016802549362, '▁the': 0.012208942323923111, '.': 0.010455865412950516}
Translation: <BOS><EOS>
------------ Epoch 11; lr: 0.00857 ------------


Train Epoch #11 - loss:   7.233: 100%|██████████| 414/414 [00:38<00:00, 10.84it/s]
Val Epoch #11 - loss:   8.346:  11%|█▏        | 6/53 [00:00<00:01, 31.25it/s]

Train Loss: 7.233 -> 7.233


Val Epoch #11 - loss:   7.995: 100%|██████████| 53/53 [00:01<00:00, 33.57it/s]


Val Loss: 8.137 -> 7.995
Saving model...


Train Epoch #12 - loss:   8.001:   0%|          | 1/414 [00:00<00:52,  7.84it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.19761726260185242, '▁the': 0.017978500574827194, '.': 0.012456007301807404}
Translation: <BOS><EOS>
------------ Translation ------------
Input: он верен своим долгосрочным заказчикам и делает все возможное для облегчения работы наших менеджеров.
Output weights: 
0 {'<EOS>': 0.19761726260185242, '▁the': 0.017978500574827194, '.': 0.012456007301807404}
Translation: <BOS><EOS>
------------ Epoch 12; lr: 0.00815 ------------


Train Epoch #12 - loss:   7.234: 100%|██████████| 414/414 [00:38<00:00, 10.85it/s]
Val Epoch #12 - loss:   8.499:  11%|█▏        | 6/53 [00:00<00:01, 32.74it/s]

Train Loss: 7.233 -> 7.234


Val Epoch #12 - loss:   8.129: 100%|██████████| 53/53 [00:01<00:00, 36.00it/s]


Val Loss: 7.995 -> 8.129
Saving model...


Train Epoch #13 - loss:   8.066:   0%|          | 1/414 [00:00<00:45,  9.05it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.2167101949453354, '▁the': 0.019501354545354843, '▁a': 0.015288935042917728}
Translation: <BOS><EOS>
------------ Translation ------------
Input: красавицы, воздушные, как облако, созданные волшебством ваших гениальных поэтов, посещали меня ночью и шептали мне чудные сказки, от которых пьянела моя голова.
Output weights: 
0 {'<EOS>': 0.2167101949453354, '▁the': 0.019501354545354843, '▁a': 0.015288935042917728}
Translation: <BOS><EOS>
------------ Epoch 13; lr: 0.00815 ------------


Train Epoch #13 - loss:   7.239: 100%|██████████| 414/414 [00:37<00:00, 10.92it/s]
Val Epoch #13 - loss:   8.423:   9%|▉         | 5/53 [00:00<00:01, 28.09it/s]

Train Loss: 7.234 -> 7.239


Val Epoch #13 - loss:   8.061: 100%|██████████| 53/53 [00:01<00:00, 33.95it/s]


Val Loss: 8.129 -> 8.061
Saving model...


Train Epoch #14 - loss:   7.994:   0%|          | 1/414 [00:00<00:46,  8.80it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.2219264805316925, '▁the': 0.020117633044719696, '.': 0.016167158260941505}
Translation: <BOS><EOS>
------------ Translation ------------
Input: этот может быть методом, не идеально приспособленным для оценки тепловых эксплуатационных характеристик тонкого tic, поскольку у него толщина всего примерно от одной восьмой до одной четвертой дюйма, и она размещена между пластинами.
Output weights: 
0 {'<EOS>': 0.2219265252351761, '▁the': 0.02011762745678425, '.': 0.016167152673006058}
Translation: <BOS><EOS>
------------ Epoch 14; lr: 0.00815 ------------


Train Epoch #14 - loss:   7.247: 100%|██████████| 414/414 [00:37<00:00, 11.07it/s]
Val Epoch #14 - loss:   8.437:   9%|▉         | 5/53 [00:00<00:01, 26.87it/s]

Train Loss: 7.239 -> 7.247


Val Epoch #14 - loss:   8.064: 100%|██████████| 53/53 [00:01<00:00, 31.92it/s]


Val Loss: 8.061 -> 8.064
Saving model...


Train Epoch #15 - loss:    7.99:   0%|          | 1/414 [00:00<00:44,  9.29it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.2208549529314041, '▁the': 0.02134057506918907, '▁a': 0.015993069857358932}
Translation: <BOS><EOS>
------------ Translation ------------
Input: представьте себе трудности когда tag library или встроенный код генерируют данные в таблице или другие детали форматирования в странице.
Output weights: 
0 {'<EOS>': 0.2208549529314041, '▁the': 0.02134057506918907, '▁a': 0.015993064269423485}
Translation: <BOS><EOS>
------------ Epoch 15; lr: 0.00774 ------------


Train Epoch #15 - loss:   7.244: 100%|██████████| 414/414 [00:37<00:00, 10.98it/s]
Val Epoch #15 - loss:   8.521:  11%|█▏        | 6/53 [00:00<00:01, 32.20it/s]

Train Loss: 7.247 -> 7.244


Val Epoch #15 - loss:   8.126: 100%|██████████| 53/53 [00:01<00:00, 34.65it/s]


Val Loss: 8.064 -> 8.126
Saving model...


Train Epoch #16 - loss:   8.095:   0%|          | 1/414 [00:00<00:55,  7.51it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.25951531529426575, '▁the': 0.022733211517333984, '.': 0.020950334146618843}
Translation: <BOS><EOS>
------------ Translation ------------
Input: 1.7.5 член программы несет ответственность за все претензии, предъявленные третьими сторонами, в том числе налоговыми органами, относящимися к участию в программе.
Output weights: 
0 {'<EOS>': 0.25951531529426575, '▁the': 0.022733205929398537, '.': 0.020950334146618843}
Translation: <BOS><EOS>
------------ Epoch 16; lr: 0.00774 ------------


Train Epoch #16 - loss:    7.24: 100%|██████████| 414/414 [00:37<00:00, 10.92it/s]
Val Epoch #16 - loss:   8.481:   9%|▉         | 5/53 [00:00<00:01, 31.44it/s]

Train Loss: 7.244 -> 7.24


Val Epoch #16 - loss:   8.103: 100%|██████████| 53/53 [00:01<00:00, 33.32it/s]


Val Loss: 8.126 -> 8.103
Saving model...


Train Epoch #17 - loss:   8.129:   0%|          | 1/414 [00:00<00:45,  9.13it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.22968436777591705, '▁the': 0.02050005830824375, '.': 0.01292781624943018}
Translation: <BOS><EOS>
------------ Translation ------------
Input: этот подход следует использовать, если вы заинтересованы в минимизации раходов альянса и предупреждении opportunistic/ авантюрных действий вашего партнера.
Output weights: 
0 {'<EOS>': 0.22968439757823944, '▁the': 0.02050006203353405, '.': 0.012927817180752754}
Translation: <BOS><EOS>
------------ Epoch 17; lr: 0.00774 ------------


Train Epoch #17 - loss:   7.248: 100%|██████████| 414/414 [00:37<00:00, 10.98it/s]
Val Epoch #17 - loss:    8.55:  11%|█▏        | 6/53 [00:00<00:01, 33.09it/s]

Train Loss: 7.24 -> 7.248


Val Epoch #17 - loss:   8.158: 100%|██████████| 53/53 [00:01<00:00, 35.78it/s]


Val Loss: 8.103 -> 8.158
Saving model...


Train Epoch #18 - loss:   8.164:   0%|          | 1/414 [00:00<00:45,  9.11it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.23147611320018768, '▁the': 0.021437887102365494, '.': 0.015200959518551826}
Translation: <BOS><EOS>
------------ Translation ------------
Input: примеры - текущая дата или поля, требующие ввода произвольной информации.
Output weights: 
0 {'<EOS>': 0.23147611320018768, '▁the': 0.021437887102365494, '.': 0.015200959518551826}
Translation: <BOS><EOS>
------------ Epoch 18; lr: 0.00735 ------------


Train Epoch #18 - loss:   7.251: 100%|██████████| 414/414 [00:38<00:00, 10.88it/s]
Val Epoch #18 - loss:   8.494:  11%|█▏        | 6/53 [00:00<00:01, 32.75it/s]

Train Loss: 7.248 -> 7.251


Val Epoch #18 - loss:   8.122: 100%|██████████| 53/53 [00:01<00:00, 35.46it/s]


Val Loss: 8.158 -> 8.122
Saving model...


Train Epoch #19 - loss:   8.127:   0%|          | 1/414 [00:00<00:46,  8.90it/s]

Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.2334105521440506, '▁the': 0.019432591274380684, '.': 0.015165431424975395}
Translation: <BOS><EOS>
------------ Translation ------------
Input: это была первая в xxi веке кавалерийская атака сша.
Output weights: 
0 {'<EOS>': 0.2334105521440506, '▁the': 0.019432591274380684, '.': 0.015165431424975395}
Translation: <BOS><EOS>
------------ Epoch 19; lr: 0.00735 ------------


Train Epoch #19 - loss:    7.24: 100%|██████████| 414/414 [00:37<00:00, 10.96it/s]
Val Epoch #19 - loss:   8.423:  11%|█▏        | 6/53 [00:00<00:01, 32.40it/s]

Train Loss: 7.251 -> 7.24


Val Epoch #19 - loss:   8.052: 100%|██████████| 53/53 [00:01<00:00, 35.05it/s]


Val Loss: 8.122 -> 8.052
Saving model...
Saved successfully
------------ Translation ------------
Input: обучение машинное, а учимся мы.
Output weights: 
0 {'<EOS>': 0.21475674211978912, '▁the': 0.02324020117521286, '.': 0.016531523317098618}
Translation: <BOS><EOS>
------------ Translation ------------
Input: наша традиция состоит в том, что я показываю дочери запечатанный конверт и читаю надпись, которая гласит, что она сможет прочесть письмо, когда ей исполнится 21 год.
Output weights: 
0 {'<EOS>': 0.21475674211978912, '▁the': 0.02324020117521286, '.': 0.016531523317098618}
Translation: <BOS><EOS>


In [13]:
# Calculate result score.
calc_BLEU(model, test_data)

Test BLEU score: 100%|██████████| 850/850 [00:18<00:00, 46.29it/s]


Test BLEU score - 0.0000


(0,
 [['Regarding the meeting on Saturday, the meeting organised by the European Union members of the G8 - Britain, Italy, France and Germany - we had a similar meeting if you recall in London in the spring, I think it was.\n'],
  ['It may seem that we are wasting words or talking circuitously about this issue, but it is rather important since the Service to Others path is that which is required for an entity to become harvestable.\n'],
  ['Some of the most common cancer types such as breast cancer, cervical cancer and colorectal cancer have a high cure rate when detected early and treated according to best evidence.\n'],
  ['The court pointed out that strong doubt exists whether a California court would use compulsory rules of German legislation regarding compensation since according to the contract, parties submitted their rights to the California law.\n'],
  ['The State party should also guarantee the right of such families to seek redress or be fairly and adequately compensated, in