In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import torchtext
from torchtext.legacy.data import Field, BucketIterator

import spacy
from tqdm.notebook import tqdm
import tqdm
import random
import math
import time
import numpy as np

import matplotlib
matplotlib.rcParams.update({'figure.figsize': (16, 12), 'font.size': 14})
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import clear_output

from nltk.tokenize import WordPunctTokenizer
from torch.nn import functional as F

In [2]:
SEED = 666

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [5]:
!ls '/content/drive/MyDrive/Colab Notebooks/NLP'

 attention_LSTM_bleu.pt  'Models Notebooks'
 attention_LSTM_last.pt   new_data_small.txt
 attention_LSTM.pt	  new_data.txt
 best_model.pt		 'NLP models'
 de-en.tgz		  rus-eng.zip
 de-en.zip		  simle_LSTM_last_en_ru.pt
 europarl-v7.de-en.de	  simple_LSTM_bleu_en_ru.pt
 europarl-v7.de-en.en	  simple_LSTM_en_ru.pt
 fra-eng.zip		  Translation_Project_attention_GRU.ipynb
 GRU_model2.pt		  Translation_Project_attention_LSTM_ru_en.ipynb
 GRU_model.pt		  Translation_Project_simple_LSTM_en_ru.ipynb


In [6]:
!wget https://drive.google.com/uc?id=1NWYqJgeG_4883LINdEjKUr6nLQPY6Yb_ -O data.txt

--2021-07-16 12:19:24--  https://drive.google.com/uc?id=1NWYqJgeG_4883LINdEjKUr6nLQPY6Yb_
Resolving drive.google.com (drive.google.com)... 74.125.197.102, 74.125.197.113, 74.125.197.100, ...
Connecting to drive.google.com (drive.google.com)|74.125.197.102|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-14-00-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/khr2159fqseubpubhbfqu1keg279fnc8/1626437925000/16549096980415837553/*/1NWYqJgeG_4883LINdEjKUr6nLQPY6Yb_ [following]
--2021-07-16 12:19:25--  https://doc-14-00-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/khr2159fqseubpubhbfqu1keg279fnc8/1626437925000/16549096980415837553/*/1NWYqJgeG_4883LINdEjKUr6nLQPY6Yb_
Resolving doc-14-00-docs.googleusercontent.com (doc-14-00-docs.googleusercontent.com)... 74.125.20.132, 2607:f8b0:400e:c07::84
Connecting to doc-14-00-docs.googleusercontent.com (doc-14-00-docs.googleusercontent.com)|74.125.

In [7]:
tokenizer_W = WordPunctTokenizer()

def tokenize_ru(x, tokenizer=WordPunctTokenizer()):
    return tokenizer.tokenize(x.lower())

def tokenize_en(x, tokenizer=WordPunctTokenizer()):
    return tokenizer.tokenize(x.lower())

In [8]:
SRC = Field(tokenize=tokenize_ru,
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True)

TRG = Field(tokenize=tokenize_en,
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True)


dataset = torchtext.legacy.data.TabularDataset(
    path='data.txt',
    format='tsv',
    fields=[('trg', TRG), ('src', SRC)]
)

In [9]:
print(len(dataset.examples))
print(dataset.examples[0].src)
print(dataset.examples[0].trg)

50000
['отель', 'cordelia', 'расположен', 'в', 'тбилиси', ',', 'в', '3', 'минутах', 'ходьбы', 'от', 'свято', '-', 'троицкого', 'собора', '.']
['cordelia', 'hotel', 'is', 'situated', 'in', 'tbilisi', ',', 'a', '3', '-', 'minute', 'walk', 'away', 'from', 'saint', 'trinity', 'church', '.']


In [10]:
train_data, valid_data, test_data = dataset.split(split_ratio=[0.8, 0.15, 0.05])

print(f"Number of training examples: {len(train_data.examples)}")
print(f"Number of validation examples: {len(valid_data.examples)}")
print(f"Number of testing examples: {len(test_data.examples)}")

Number of training examples: 40000
Number of validation examples: 2500
Number of testing examples: 7500


In [11]:
SRC.build_vocab(dataset, min_freq = 2)
TRG.build_vocab(dataset, min_freq = 2)

In [12]:
print(f"Unique tokens in source (ru) vocabulary: {len(SRC.vocab)}")
print(f"Unique tokens in target (en) vocabulary: {len(TRG.vocab)}")

Unique tokens in source (ru) vocabulary: 16483
Unique tokens in target (en) vocabulary: 11778


In [13]:
print(vars(train_data.examples[9]))

{'trg': ['there', 'is', 'a', 'concierge', 'service', 'and', '24', '-', 'hour', 'front', 'desk', '.'], 'src': ['гостям', 'предоставляются', 'услуги', 'консьержа', 'и', 'круглосуточной', 'стойки', 'регистрации', '.']}


In [14]:
def _len_sort_key(x):
    return len(x.src)

BATCH_SIZE = 64

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE, 
    device = device,
    sort_key=_len_sort_key
)

In [15]:
class Encoder_LSTM(nn.Module):
    def __init__(self, input_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout):
        super().__init__()
        
        self.embedding = nn.Embedding(input_dim, emb_dim)
        
        self.LSTM = nn.LSTM(emb_dim, enc_hid_dim, bidirectional = True, num_layers=2)

        self.fc = nn.Linear(enc_hid_dim * 2, dec_hid_dim)
        
        self.fc_2 = nn.Linear(enc_hid_dim * 2, dec_hid_dim)

        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src):
        
        #src = [src len, batch size]
        embedded = self.dropout(self.embedding(src))
        #embedded = [src len, batch size, emb dim]
        
        outputs, (hidden, cell) = self.LSTM(embedded)
        #outputs = [src len, batch size, hid dim * num directions]
        #hidden = [n layers * num directions, batch size, hid dim]
        #cell = [n layers, batch size, hid dim]

        #hidden is stacked [forward_1, backward_1, forward_2, backward_2, ...]
        #outputs are always from the last layer
        #hidden [-2, :, : ] is the last of the forwards RNN 
        #hidden [-1, :, : ] is the last of the backwards RNN
        
        hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)))
        cell = torch.tanh(self.fc_2(torch.cat((cell[-2,:,:], cell[-1,:,:]), dim=1)))
        #outputs = [src len, batch size, enc hid dim * 2]
        #hidden = [batch size, dec hid dim]
        #cell = [n layers, batch size, hid dim]
        return outputs, hidden, cell

In [16]:
def temp_softmax(x, dim=0, temperature=1):
    e_x = torch.exp(x / temperature)
    return e_x/torch.sum(e_x, dim=dim)

In [27]:
class Attention_LSTM(nn.Module):
    def __init__(self, enc_hid_dim, dec_hid_dim):
        super().__init__()
        
        self.attn = nn.Linear((enc_hid_dim * 2) + dec_hid_dim, dec_hid_dim)
        self.v = nn.Linear(dec_hid_dim, 1, bias = False)
        
    def forward(self, hidden, encoder_outputs):
        
        #hidden = [batch size, dec hid dim]
        #encoder_outputs = [src len, batch size, enc hid dim * 2]
        
        batch_size = encoder_outputs.shape[1]
        src_len = encoder_outputs.shape[0]
        
        hidden = hidden.unsqueeze(1)
        #hidden = [batch size, 1, dec hid dim]

        hidden = hidden.repeat(1, src_len, 1)
        #hidden = [batch size, src len, dec hid dim]
        
        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        #encoder_outputs = [batch size, src len, enc hid dim * 2]

        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim = 2))) 
        #energy = [batch size, src len, dec hid dim]

        attention = self.v(energy).squeeze(2)
        #attention = [batch size, src len]
        
        return F.softmax(attention, dim=1)#temp_softmax(attention, dim=1)

In [28]:
class Decoder_LSTM(nn.Module):
    def __init__(self, output_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout, attention):
        super().__init__()

        self.output_dim = output_dim
        self.attention = attention
        
        self.embedding = nn.Embedding(output_dim, emb_dim)
        
        self.LSTM = nn.LSTM((enc_hid_dim * 2) + emb_dim, dec_hid_dim)

        self.fc_out = nn.Linear((enc_hid_dim * 2) + dec_hid_dim + emb_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input, hidden, cell, encoder_outputs):
             
        #input = [batch size]
        #hidden = [batch size, dec hid dim]
        #encoder_outputs = [src len, batch size, enc hid dim * 2]
        
        input = input.unsqueeze(0)
        #input = [1, batch size]
        
        embedded = self.dropout(self.embedding(input))
        #embedded = [1, batch size, emb dim]
        
        a = self.attention(hidden, encoder_outputs)
        #a = [batch size, src len]
        a = a.unsqueeze(1)
        #a = [batch size, 1, src len]
        
        encoder_outputs = encoder_outputs.permute(1, 0, 2)
        #encoder_outputs = [batch size, src len, enc hid dim * 2]
        
        weighted = torch.bmm(a, encoder_outputs)
        #weighted = [batch size, 1, enc hid dim * 2]
        weighted = weighted.permute(1, 0, 2)
        #weighted = [1, batch size, enc hid dim * 2]
        
        rnn_input = torch.cat((embedded, weighted), dim = 2)
        #rnn_input = [1, batch size, (enc hid dim * 2) + emb dim]
        hidden = hidden.unsqueeze(0)
        #hidden = [1, batch size, dec hid dim]
        cell = cell.unsqueeze(0)
        #cell = [1, batch size, dec hid dim]

        output, (hidden, cell) = self.LSTM(rnn_input, (hidden, cell))
        #output = [seq len, batch size, dec hid dim * n directions]
        #hidden = [n layers * n directions, batch size, dec hid dim]
        #cell = [n layers * n directions, batch size, dec hid dim]

        #seq len, n layers and n directions will be 1
        #output = [1, batch size, dec hid dim]
        #hidden = [1, batch size, dec hid dim]
        #cell = [1, batch size, dec hid dim]

        embedded = embedded.squeeze(0)
        #embedded = [batch size, emb dim]
        output = output.squeeze(0)
        #output = [batch size, dec hid dim]
        weighted = weighted.squeeze(0)
        #weighted = [batch size, enc hid dim * 2]
        hidden = hidden.squeeze(0)
        #hidden = [batch size, dec hid dim]
        cell = cell.squeeze(0)
        #cell = [batch size, dec hid dim]

        prediction = self.fc_out(torch.cat((output, weighted, embedded), dim = 1))
        #prediction = [batch size, output dim]
        
        return prediction, hidden, cell

In [29]:
class Translator_LSTM(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
    def forward(self, src, trg, teacher_forcing):
        
        #src = [src len, batch size]
        #trg = [trg len, batch size]
        
        batch_size = src.shape[1]
        trg_len = trg.shape[0]
        trg_vocab_size = self.decoder.output_dim
        
        #tensor to store decoder outputs
        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)
        
        #encoder_outputs is all hidden states of the input sequence, back and forwards
        #hidden is the final forward and backward hidden states, passed through a linear layer
        encoder_outputs, hidden, cell = self.encoder(src)
                
        #first input to the decoder is the <sos> tokens
        input = trg[0,:]
        
        for t in range(1, trg_len):
            
            #insert input token embedding, previous hidden state and all encoder hidden states
            #receive output tensor (predictions) and new hidden state
            output, hidden, cell = self.decoder(input, hidden, cell, encoder_outputs)
            
            #place predictions in a tensor holding predictions for each token
            outputs[t] = output
            
            #decide if we are going to use teacher forcing or not
            teacher_force = random.random() < teacher_forcing
            
            #get the highest predicted token from our predictions
            top1 = output.argmax(1) 
            
            #if teacher forcing, use actual next token as next input
            #if not, use predicted token
            input = trg[t] if teacher_force else top1

        return outputs

In [37]:
input_dim = len(SRC.vocab)
output_dim = len(TRG.vocab)
encoder_embedding_dim = 256
decoder_embedding_dim = 256
encoder_hidden_dim = 512
decoder_hidden_dim = 512
encoder_dropout_prob = 0.5
decoder_dropout_prob = 0.5

attention = Attention_LSTM(encoder_hidden_dim, decoder_hidden_dim)
encoder = Encoder_LSTM(input_dim, encoder_embedding_dim, encoder_hidden_dim, 
                      decoder_hidden_dim, encoder_dropout_prob)
decoder = Decoder_LSTM(output_dim, decoder_embedding_dim, encoder_hidden_dim, 
                      decoder_hidden_dim, decoder_dropout_prob, attention)

model = Translator_LSTM(encoder, decoder, device).to(device)

In [38]:
def init_weights(m):
    for name, param in m.named_parameters():
        if 'weight' in name:
            nn.init.normal_(param.data, mean=0, std=0.01)
        else:
            nn.init.constant_(param.data, 0)
            
model.apply(init_weights)

Translator_LSTM(
  (encoder): Encoder_LSTM(
    (embedding): Embedding(16483, 256)
    (LSTM): LSTM(256, 512, num_layers=2, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=512, bias=True)
    (fc_2): Linear(in_features=1024, out_features=512, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder_LSTM(
    (attention): Attention_LSTM(
      (attn): Linear(in_features=1536, out_features=512, bias=True)
      (v): Linear(in_features=512, out_features=1, bias=False)
    )
    (embedding): Embedding(11778, 256)
    (LSTM): LSTM(1280, 512)
    (fc_out): Linear(in_features=1792, out_features=11778, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [39]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 43,317,506 trainable parameters


In [40]:
def delete_eos(tokens_iter):
    for token in tokens_iter:
        if token == '<eos>':
            break
        yield token

def remove_tech_tokens(tokens_iter, tokens_to_remove=['<sos>', '<unk>', '<pad>']):
    return [x for x in tokens_iter if x not in tokens_to_remove]

def generate_translation(src, trg, model, TRG_vocab):
    model.eval()
    # запускаем без teacher_forcing
    output = model(src, trg, 0)
    # удаляем первый токен и выбираем лучшее слово
    output = output[1:].argmax(-1)
    #print(output)
    original = remove_tech_tokens(delete_eos([TRG_vocab.itos[x] for x in list(trg[:,0].cpu().numpy())]))
    generated = remove_tech_tokens(delete_eos([TRG_vocab.itos[x] for x in list(output[:, 0].cpu().numpy())]))
    
    print('Правильный перевод: {}'.format(' '.join(original)))
    print('Перевод модели: {}'.format(' '.join(generated)))

def get_text(x, TRG_vocab):
     generated = remove_tech_tokens(delete_eos([TRG_vocab.itos[elem] for elem in list(x)]))
     return generated

from nltk.translate.bleu_score import corpus_bleu

def get_bleu(iterator):
    original_text = []
    generated_text = []
    model.eval()
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            src = batch.src
            trg = batch.trg
            # запускаем без teacher_forcing
            output = model(src, trg, 0)
            # удаляем первый токен и выбираем лучшее слово
            output = output[1:].argmax(-1)
            # собираем данные для подсчета BLEU
            original_text.extend([get_text(x, TRG.vocab) for x in trg.cpu().numpy().T])
            generated_text.extend([get_text(x, TRG.vocab) for x in output.detach().cpu().numpy().T])
    bleu = corpus_bleu([[text] for text in original_text], generated_text) * 100
    return bleu

In [41]:
optimizer = optim.Adam(model.parameters())
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]
criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [42]:
def train(model, iterator, optimizer, criterion, clip, epoch):
    
    model.train()
    
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
        
        src = batch.src
        trg = batch.trg
        
        optimizer.zero_grad()
        teacher_forcing = 1 - epoch * 0.25
        if teacher_forcing < 0.6:
            teacher_forcing = 0.6
        output = model(src, trg, teacher_forcing = teacher_forcing)
        
        #trg = [trg len, batch size]
        #output = [trg len, batch size, output dim]
        
        output_dim = output.shape[-1]
        
        output = output[1:].view(-1, output_dim)
        trg = trg[1:].view(-1)
        
        #trg = [(trg len - 1) * batch size]
        #output = [(trg len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):

            src = batch.src
            trg = batch.trg

            output = model(src, trg, 0) #turn off teacher forcing

            #trg = [trg len, batch size]
            #output = [trg len, batch size, output dim]

            output_dim = output.shape[-1]
            
            output = output[1:].view(-1, output_dim)
            trg = trg[1:].view(-1)

            #trg = [(trg len - 1) * batch size]
            #output = [(trg len - 1) * batch size, output dim]

            loss = criterion(output, trg)

            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [43]:
epochs = 10
clip = 1

best_valid_loss = float('inf')
best_valid_bleu = 0
for epoch in tqdm.notebook.tqdm(range(epochs)):
    
    start_time = time.time()
    
    train_loss = train(model, train_iterator, optimizer, criterion, clip, epoch)
    valid_loss = evaluate(model, valid_iterator, criterion)
    valid_bleu = get_bleu(test_iterator)
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), '/content/drive/MyDrive/Colab Notebooks/NLP/attention_LSTM_ru_en.pt')
    if valid_bleu > best_valid_bleu:
        best_valid_bleu= valid_bleu
        torch.save(model.state_dict(), '/content/drive/MyDrive/Colab Notebooks/NLP/attention_LSTM_bleu_ru_en.pt')
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')
    print(f'\t Val. BLEU: {valid_bleu:.3f}')

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

Epoch: 01 | Time: 6m 34s
	Train Loss: 3.718 | Train PPL:  41.202
	 Val. Loss: 7.705 |  Val. PPL: 2218.313
	 Val. BLEU: 9.794
Epoch: 02 | Time: 6m 36s
	Train Loss: 3.000 | Train PPL:  20.077
	 Val. Loss: 5.538 |  Val. PPL: 254.226
	 Val. BLEU: 19.404
Epoch: 03 | Time: 6m 35s
	Train Loss: 2.758 | Train PPL:  15.768
	 Val. Loss: 4.810 |  Val. PPL: 122.721
	 Val. BLEU: 24.791
Epoch: 04 | Time: 6m 35s
	Train Loss: 2.406 | Train PPL:  11.086
	 Val. Loss: 4.781 |  Val. PPL: 119.279
	 Val. BLEU: 28.016
Epoch: 05 | Time: 6m 35s
	Train Loss: 2.134 | Train PPL:   8.444
	 Val. Loss: 4.757 |  Val. PPL: 116.363
	 Val. BLEU: 29.237
Epoch: 06 | Time: 6m 36s
	Train Loss: 1.876 | Train PPL:   6.526
	 Val. Loss: 4.875 |  Val. PPL: 130.946
	 Val. BLEU: 29.942
Epoch: 07 | Time: 6m 35s
	Train Loss: 1.682 | Train PPL:   5.376
	 Val. Loss: 4.843 |  Val. PPL: 126.863
	 Val. BLEU: 30.190
Epoch: 08 | Time: 6m 32s
	Train Loss: 1.544 | Train PPL:   4.685
	 Val. Loss: 4.988 |  Val. PPL: 146.679
	 Val. BLEU: 30.727


In [44]:
torch.save(model.state_dict(), '/content/drive/MyDrive/Colab Notebooks/NLP/attention_LSTM_last_ru_en.pt')

In [45]:
model_name = 'attention_LSTM_bleu_ru_en.pt'
model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/NLP/' + model_name))

<All keys matched successfully>

In [46]:
def translate_batch(iterator):
    batch = next(iter(iterator))
    for idx in range(10):
        src = batch.src[:, idx:idx+1]
        trg = batch.trg[:, idx:idx+1]
        generate_translation(src, trg, model, TRG.vocab)

In [47]:
translate_batch(test_iterator)

Правильный перевод: laundry facilities are on site .
Перевод модели: laundry facilities are available on site .
Правильный перевод: guests can enjoy the on - site restaurant .
Перевод модели: guests can enjoy the on - site restaurant .
Правильный перевод: there is equipped fully equipped kitchenette with fridge and the bathroom comes with a shower .
Перевод модели: the bathroom comes with a shower .
Правильный перевод: free wifi access is available .
Перевод модели: free wifi access is available .
Правильный перевод: there is a sandbox in the yard .
Перевод модели: there is a a corner corner .
Правильный перевод: free private parking is available on site .
Перевод модели: free private parking is available .
Правильный перевод: some rooms have a balcony .
Перевод модели: some rooms have a balcony .
Правильный перевод: private parking is available on site free of charge .
Перевод модели: free private parking is available .
Правильный перевод: free parking is available on site .
Перевод м

In [48]:
print("Train BLEU = ",get_bleu(train_iterator))
print("Valid BLEU = ",get_bleu(valid_iterator))
print("Test BLEU = ",get_bleu(test_iterator))

Train BLEU =  48.9565802478055
Valid BLEU =  29.793645345919405
Test BLEU =  30.904735674632462


In [49]:
def translate(data):
    with open('example.txt', 'w') as file:
        file.write(str(data*2+','+data))
    test_dataset = torchtext.legacy.data.TabularDataset(
        path='example.txt',
        format='csv',
        fields=[('trg', TRG), ('src', SRC)]
    )
    iterator = BucketIterator(
        test_dataset, 
        batch_size = 1, 
        device = device,
        sort_key=_len_sort_key
    )
    generated_text = []
    model.eval()
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            src = batch.src
            trg = batch.trg
            # запускаем без teacher_forcing
            output = model(src, trg, 0)
            # удаляем первый токен и выбираем лучшее слово
            output = output[1:].argmax(-1)
            # собираем данные для подсчета BLEU
            generated_text.extend([get_text(x, TRG.vocab) for x in output.detach().cpu().numpy().T])
            generated_text=(' '.join(generated_text[0])[:-2]+'.').capitalize()
    translation = 'Перевод модели: {}'.format(generated_text)
    return translation

In [50]:
data='На всей территории гостевого дома Jam работает бесплатный Wi-Fi.'
translate(data)

'Перевод модели: Ethos guesthouse features free wifi throughout the property.'