In [28]:
! pip3 install --user torch torchvision

Collecting torch
  Downloading https://files.pythonhosted.org/packages/e8/c5/0763a145e051ce7c84c128621693d1c5dfad5a42d551e8d79742261002e2/torch-0.3.1-cp35-cp35m-manylinux1_x86_64.whl (496.4MB)
[K    100% |████████████████████████████████| 496.4MB 2.3kB/s eta 0:00:01  7% |██▌                             | 38.6MB 35.4MB/s eta 0:00:13    10% |███▍                            | 52.1MB 39.0MB/s eta 0:00:12    13% |████▍                           | 68.3MB 26.1MB/s eta 0:00:17    14% |████▊                           | 73.2MB 26.8MB/s eta 0:00:16    15% |█████                           | 76.5MB 37.9MB/s eta 0:00:12    19% |██████▏                         | 95.2MB 40.8MB/s eta 0:00:10    20% |██████▍                         | 99.5MB 33.0MB/s eta 0:00:13    24% |████████                        | 122.5MB 21.3MB/s eta 0:00:18    27% |████████▊                       | 135.9MB 42.2MB/s eta 0:00:09    31% |██████████                      | 155.7MB 44.5MB/s eta 0:00:08    33% |██████████▉             

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.optim as optim
import itertools

In [2]:
TRAIN_FILE = 'data/ru-be-train.txt'
TEST_FILE = 'data/ru-be-test.txt'

In [81]:
MAX_LENGTH = 30
 
class Alphabet:
    START = '__START__'
    END = '_END_'
 
    def __init__(self, max_length=MAX_LENGTH):
        """Initialize the class which works with letter and index representations of sequences.
        Parameters
        ----------
        max_length : int
            The largest permitted length for sequence. Longer sequences are cropped.
        """
        self.max_length = max_length
        self.letter2index_ = {Alphabet.START : 0, Alphabet.END : 1}
        self.index2letter_ = [Alphabet.START, Alphabet.END]
        
    def get_index(self, letter):
        if letter not in self.letter2index_:
            self.letter2index_[letter] = len(self.index2letter_)
            self.index2letter_.append(letter)
        return self.letter2index_[letter]
    
    @property
    def start_index(self):
        return self.letter2index_[Alphabet.START]
    
    @property
    def end_index(self):
        return self.letter2index_[Alphabet.END]
    
    def index2letter(self, x):
        result = []
        for index in x:
            result.append(self.index2letter_[index])
            if index == self.end_index:
                break
        return ''.join(result)
    
    def letter2index(self, word):
        lst = [self.get_index(letter) for letter in word]
        return lst[:self.max_length - 1] + [self.get_index(Alphabet.END)] * max(1, self.max_length - len(lst))
    
    def __len__(self):
        return len(self.index2letter_)
    
    # torch utils
    def get_length(self, input_sequence):
        """Infers the lengths of the sequences in batch
        
        input_sequence: Tensor NxT
        
        returs: Tensor N
        """
        return (input_sequence == self.end_index).max(dim=1)[1] + 1
    
    def get_mask(self, input_sequence):
        """Infers the mask of the sequences in batch
        
        input_sequence: Tensor NxT
        
        returns: Tensor NxT contained 0s and 1s.
        """
        return (torch.cumsum(input_sequence == self.end_index, dim=1) < 2).float()
    
    def get_one_hot_repr(self, input_sequence):
        """Produces one_hot representation from label representation/
        
        input_sequence: LongTensor NxT
        
        returns: FloatTensor NxTxH
        """
        
        onehot = torch.FloatTensor(*input_sequence.shape, len(self)).zero_()
        onehot.scatter_(2, input_sequence.unsqueeze(2), 1.)
        
        return onehot 

In [82]:
ru = Alphabet()
be = Alphabet()

In [83]:
def load_pair_dataset(filename, alph1, alph2):
    x, y = [], []
    with open(filename, 'r') as ftr:
        for line in ftr:
            try:
                word1, word2 = line.split()
            except ValueError:
                continue
            x.append(alph1.letter2index(word1))
            y.append(alph2.letter2index(word2))
    return np.array(x), np.array(y)       

In [84]:
X, Y = load_pair_dataset(TRAIN_FILE, ru, be)

In [None]:
onehot = torch.FloatTensor(2, 2, 35)

In [None]:
indx = torch.LongTensor([[1, 4], [4, 1]])

In [None]:
onehot.scatter_

In [None]:
from sklearn.model_selection import train_test_split

train_X, val_X, train_Y, val_Y = train_test_split(X, Y, test_size=0.1, random_state=42)

In [None]:
train_X.dtype

In [None]:
def seq2seq_softmax_with_mask(entries, mask):
    entries = entries[:,:,0]
    maxs = entries.max(1, keepdim=True)[0]
    #print(entries.shape, maxs.shape, mask.shape)
    entries = torch.exp(entries - maxs) * mask
    return entries / (entries.sum(dim=1, keepdim=True) + 1e-15)


class MultiplicativeAttentionWithMask(nn.Module):
    def __init__(self, input_size, output_size):
        super(MultiplicativeAttentionWithMask, self).__init__()
        self.encoder_linear = nn.Linear(input_size, output_size)
        self.decoder_linear = nn.Linear(input_size, output_size)
        
    def forward(self, decoder_hidden, encoder_hiddens, encoder_mask):
        """
        decoder_hidden: NxH
        encoder_hiddens: NxTxH
        """
        decoder_hidden_key = F.tanh(self.decoder_linear(decoder_hidden))
        encoder_hiddens_keys = F.tanh(self.encoder_linear(encoder_hiddens))
        weights = torch.bmm(encoder_hiddens_keys, decoder_hidden_key.unsqueeze(2))
        weights = seq2seq_softmax_with_mask(weights, encoder_mask)
        return torch.bmm(encoder_hiddens.transpose(1, 2), weights.unsqueeze(2))[:,:,0]

In [10]:
class SimpleGRUEncoder(nn.Module):
    def __init__(self, alphabet, embedding_size, hidden_size):
        super(SimpleGRUEncoder, self).__init__()
        self.alphabet = alphabet
        self.embedding = nn.Embedding(num_embeddings=len(self.alphabet), embedding_dim=embedding_size)
        self.gru = nn.GRU(input_size=embedding_size, hidden_size=hidden_size, batch_first=True)
        
    def forward(self, input_sequence):
        batch_size = input_sequence.size(0)
        embeddings = self.embedding(input_sequence)
        out, _ = self.gru(embeddings)
        return out, self.alphabet.get_mask(input_sequence)

In [11]:
class SimpleGRUDecoderWithAttention(nn.Module):
    def __init__(self, alphabet, embedding_size, hidden_size):
        super(SimpleGRUDecoderWithAttention, self).__init__()
        self.hidden_size = hidden_size
        self.alphabet = alphabet
        self.embedding = nn.Embedding(num_embeddings=len(alphabet), embedding_dim=embedding_size)
        self.gru_cell = nn.GRUCell(input_size=embedding_size + hidden_size, hidden_size=hidden_size)
        self.logit_linear = nn.Linear(hidden_size, len(alphabet))
        self.attention = MultiplicativeAttentionWithMask(hidden_size, embedding_size)
        
    def init_hidden(self, batch_size):
        return Variable(torch.zeros(batch_size, self.hidden_size))
        
    def forward(self, token, prev_h, encoder_hs, encoder_mask):
        embedding = self.embedding(token)
        attention = self.attention(prev_h, encoder_hs, encoder_mask)
        #print(attention.shape, embedding.shape)
        h = self.gru_cell(torch.cat((embedding, attention), dim=1), prev_h)
        out = self.logit_linear(h)
        return out, h

In [12]:
class SimpleGRUSupervisedSeq2Seq(nn.Module):
    def __init__(self, src_alphabet, dst_alphabet, embedding_size, hidden_size):
        super(SimpleGRUSupervisedSeq2Seq, self).__init__()
        self.encoder = SimpleGRUEncoder(src_alphabet, embedding_size, hidden_size)
        self.h_linear = nn.Linear(hidden_size, hidden_size)
        self.decoder = SimpleGRUDecoderWithAttention(dst_alphabet, embedding_size, hidden_size)
        
    def start(self, batch_size):
        return Variable(torch.from_numpy(np.repeat(self.decoder.alphabet.start_index, batch_size)))
    
    '''
    def middle_layer(self, out, mask):
        #print(mask.sum(1))
        return F.tanh(self.h_linear(out[range(out.shape[0]), mask.sum(1).long() - 1]))
    '''
    
    def forward(self, input_sequence, output_sequence):
        enc_out, enc_mask = self.encoder(input_sequence)
        dec_h = self.decoder.init_hidden(input_sequence.size(0))
        logits = []
        for x in itertools.chain((self.start(output_sequence.size(0)),), output_sequence.transpose(0, 1)[:-1]):
            out, dec_h = self.decoder(x, dec_h, enc_out, enc_mask)
            logits.append(out)
        return F.log_softmax(torch.stack(logits, dim=1), dim=-1)
    
    def translate(self, word, strategy='', max_length=30):
        if isinstance(word, str):
            input_sequence = Variable(torch.from_numpy(np.array([model.encoder.alphabet.letter2index(word)])))
        elif isinstance(word, np.ndarray):
            input_sequence = Variable(torch.from_numpy(word[np.newaxis]))
        else:
            assert False, "word argument must be str or numpy array"
        #print(input_sequence.shape)
        enc_out, enc_mask = self.encoder(input_sequence)
        hidden = self.decoder.init_hidden(input_sequence.size(0))
        token = self.start(1)
        #print(token.shape, hidden.shape)
        lst = []
        for i in range(max_length):
            out, hidden = self.decoder(token, hidden, enc_out, enc_mask)
            token = out.max(1)[1]
            #print(token, out)
            lst.append(token.data[0])
            if token.data[0] == self.decoder.alphabet.end_index:
                break
        return ''.join(model.decoder.alphabet.index2letter(lst))

In [13]:
def batch_iterator(X, Y=None, batch_size=32):
    assert Y is None or X.shape[0] == Y.shape[0]
    ind = np.arange(X.shape[0])
    np.random.shuffle(ind)
    for i in range(0, X.shape[0], batch_size):
        if Y is not None:
            yield X[ind[i:i + batch_size]], Y[ind[i:i + batch_size]]
        else:
            yield X[ind[i:i + batch_size]]

In [25]:
model = SimpleGRUSupervisedSeq2Seq(ru, be, 65, 256)
opt = optim.Adam(model.parameters(), lr=1e-3)

In [26]:
model.state_dict

<bound method Module.state_dict of SimpleGRUSupervisedSeq2Seq(
  (encoder): SimpleGRUEncoder(
    (embedding): Embedding(35, 65)
    (gru): GRU(65, 256, batch_first=True)
  )
  (h_linear): Linear(in_features=256, out_features=256, bias=True)
  (decoder): SimpleGRUDecoderWithAttention(
    (embedding): Embedding(65, 65)
    (gru_cell): GRUCell(321, 256)
    (logit_linear): Linear(in_features=256, out_features=65, bias=True)
    (attention): MultiplicativeAttentionWithMask(
      (encoder_linear): Linear(in_features=256, out_features=65, bias=True)
      (decoder_linear): Linear(in_features=256, out_features=65, bias=True)
    )
  )
)>

In [27]:
def cross_entropy(log_predictions, targets, alphabet):
    """ Cross entropy loss for sequences
    Parameters
    ---------
    log_predictions: Tensor NxTxH
        Log probabilities
    targets: Tensor NxT
        True index-encoded translations
    alphabet: Alphabet
        Alphabet object
    
    """
    length_mask = alphabet.get_mask(targets)
    targets_mask = torch.zeros_like(log_predictions).scatter_(2, targets.view(*targets.shape, 1), 1.0)
    mask = targets_mask * length_mask.view(*length_mask.shape, 1)
    #print(mask.sum(1, keepdim=True).sum(2, keepdim=True))
    return (log_predictions * mask / (mask.sum(2, keepdim=True).sum(1, keepdim=True) * -log_predictions.size(0))).sum()
 

In [28]:
val_src_words = [ru.index2letter(x) for x in val_X]
val_trg_words = [be.index2letter(y) for y in val_Y]

In [29]:
import editdistance as ed
import time
import os
import nltk.translate.bleu_score as bl
from tqdm import tqdm_notebook

CHECKPOINTS = './checkpoints'

def compute_bleu_score(model, src_words, trg_words):
    return _compute_metric_average(model, src_words, trg_words, lambda x, y: bl.sentence_bleu([list(x)], list(y)))

def compute_editdistance(model, src_words, trg_words):
    return _compute_metric_average(model, src_words, trg_words, ed.eval)

def _compute_metric_average(model, src_words, trg_words, metric):
    scs = [metric(model.translate(x[:-5])[:-5], y[:-5]) for x, y in zip(tqdm_notebook(src_words), trg_words)]
    return np.mean(scs)

def train(model, opt, train_X, train_Y, val_src_words, val_trg_words, checkpoints_folder, metrics_compute_freq=50, n_epochs=7):
    cur_loss = 0
    for epoch in range(n_epochs):
        model.train()
        start_time = time.time()
        for i, (x, y) in enumerate(batch_iterator(train_X, train_Y)):
            inputs = Variable(torch.from_numpy(x))
            targets = Variable(torch.from_numpy(y))
            log_predictions = model(inputs, targets)
            #print(x)
            loss = cross_entropy(log_predictions, targets, be)
            #print(loss.data, log_predictions.data.min())
            loss.backward()
            cur_loss = 0.9 * cur_loss + 0.1 * loss.data[0]
            opt.step()
            opt.zero_grad()
            if (i + 1) % metrics_compute_freq == 0:
                print("epoch: {} iter: {} loss: {}".format(epoch, i, cur_loss))
        model.eval() 
        val_score = compute_bleu_score(model, val_src_words, val_trg_words)
        print("epoch: {} val_score: {} time: {}"
              .format(epoch, val_score, time.time() - start_time))
        torch.save(model.state_dict(), os.path.join(checkpoints_folder, "state_dict_{}_{}.pth".format(epoch, val_score)))
                
train(model, opt, train_X, train_Y, val_src_words, val_trg_words, checkpoints_folder=CHECKPOINTS, metrics_compute_freq=50, n_epochs=5)

epoch: 0 iter: 49 loss: 2.658094564029786
epoch: 0 iter: 99 loss: 1.9452701785001625
epoch: 0 iter: 149 loss: 1.5060542984939285
epoch: 0 iter: 199 loss: 1.2539703208550455
epoch: 0 iter: 249 loss: 1.1064925362440883
epoch: 0 iter: 299 loss: 1.1126237672424175
epoch: 0 iter: 349 loss: 0.9517145791588428
epoch: 0 iter: 399 loss: 0.9527058021153767
epoch: 0 iter: 449 loss: 0.944212528404
epoch: 0 iter: 499 loss: 0.9366004106275856
epoch: 0 iter: 549 loss: 0.898973423502346
epoch: 0 iter: 599 loss: 0.8774192931626411
epoch: 0 iter: 649 loss: 0.8687348401024029
epoch: 0 iter: 699 loss: 0.8806170715339428
epoch: 0 iter: 749 loss: 0.8417171325990492
epoch: 0 iter: 799 loss: 0.8754202066837747
epoch: 0 iter: 949 loss: 0.7674467000063532
epoch: 0 iter: 999 loss: 0.7958143459931212
epoch: 0 iter: 1049 loss: 0.8259128763589467
epoch: 0 iter: 1099 loss: 0.8078720349438748


Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 4-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


epoch: 1 iter: 49 loss: 0.7564559385103249
epoch: 1 iter: 99 loss: 0.6769460815427547
epoch: 1 iter: 149 loss: 0.6797774114468229
epoch: 1 iter: 199 loss: 0.7332826297063264
epoch: 1 iter: 349 loss: 0.7315959323937342
epoch: 1 iter: 399 loss: 0.6980103078134308
epoch: 1 iter: 449 loss: 0.6954559927595563
epoch: 1 iter: 499 loss: 0.6728843532871932
epoch: 1 iter: 549 loss: 0.6983245302508848
epoch: 1 iter: 599 loss: 0.6933372382867709
epoch: 1 iter: 649 loss: 0.6586113522930109
epoch: 1 iter: 699 loss: 0.681526105897865
epoch: 1 iter: 749 loss: 0.664668386590138
epoch: 1 iter: 799 loss: 0.631510930791135
epoch: 1 iter: 849 loss: 0.6717778035628991
epoch: 1 iter: 899 loss: 0.6294165461083708
epoch: 1 iter: 949 loss: 0.6365922626680415
epoch: 1 iter: 999 loss: 0.6433808709181964
epoch: 1 iter: 1049 loss: 0.6359586440487551
epoch: 1 iter: 1099 loss: 0.598834692402115



epoch: 1 val_score: 0.7112108952112097 time: 1126.3234903812408
epoch: 2 iter: 49 loss: 0.5500012394054368
epoch: 2 iter: 99 loss: 0.5719838355182909
epoch: 2 iter: 149 loss: 0.6140890254027743
epoch: 2 iter: 199 loss: 0.5894810584898776
epoch: 2 iter: 249 loss: 0.5916134667050021
epoch: 2 iter: 299 loss: 0.5911880229460058
epoch: 2 iter: 349 loss: 0.590636631945699
epoch: 2 iter: 399 loss: 0.49537212592801494
epoch: 2 iter: 449 loss: 0.5731639161831696
epoch: 2 iter: 499 loss: 0.5938712321965762
epoch: 2 iter: 549 loss: 0.642468438016881
epoch: 2 iter: 599 loss: 0.6061879933540106
epoch: 2 iter: 649 loss: 0.5905270646446676
epoch: 2 iter: 699 loss: 0.5457749712378999
epoch: 2 iter: 749 loss: 0.6195805045405616
epoch: 2 iter: 799 loss: 0.5690627256651999
epoch: 2 iter: 849 loss: 0.5704222785395039
epoch: 2 iter: 899 loss: 0.5782761578177854
epoch: 2 iter: 949 loss: 0.5897927144299605
epoch: 2 iter: 999 loss: 0.5705088882869493
epoch: 2 iter: 1049 loss: 0.5073121535080907
epoch: 2 iter

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




epoch: 2 val_score: 0.7182228578595783 time: 1124.7359964847565
epoch: 3 iter: 49 loss: 0.5347833189316005
epoch: 3 iter: 99 loss: 0.46868262541255357
epoch: 3 iter: 149 loss: 0.5451921168764445
epoch: 3 iter: 199 loss: 0.5298714926583855
epoch: 3 iter: 249 loss: 0.4974048926487826
epoch: 3 iter: 299 loss: 0.4501444181505279
epoch: 3 iter: 349 loss: 0.48753632086869236
epoch: 3 iter: 399 loss: 0.4938887831833474
epoch: 3 iter: 449 loss: 0.5047089317457525
epoch: 3 iter: 499 loss: 0.49194954649047673
epoch: 3 iter: 549 loss: 0.46731216346731214
epoch: 3 iter: 599 loss: 0.461073226839438
epoch: 3 iter: 649 loss: 0.5140152006705162
epoch: 3 iter: 699 loss: 0.48908072286892657
epoch: 3 iter: 749 loss: 0.4939158150918599
epoch: 3 iter: 799 loss: 0.5115724518267698
epoch: 3 iter: 849 loss: 0.48050468354417397
epoch: 3 iter: 899 loss: 0.45724669590551703
epoch: 3 iter: 949 loss: 0.4387043664267125
epoch: 3 iter: 999 loss: 0.46182817783749497
epoch: 3 iter: 1049 loss: 0.48179894475412166
epoc


epoch: 3 val_score: 0.7258356677385501 time: 1124.679298400879
epoch: 4 iter: 49 loss: 0.4347603522795892
epoch: 4 iter: 99 loss: 0.454316446111022
epoch: 4 iter: 149 loss: 0.41556501353764386
epoch: 4 iter: 199 loss: 0.3891953656405022
epoch: 4 iter: 249 loss: 0.4173830206069239
epoch: 4 iter: 299 loss: 0.42057612199415245
epoch: 4 iter: 349 loss: 0.41305324171667346
epoch: 4 iter: 399 loss: 0.4225158939252388
epoch: 4 iter: 449 loss: 0.4143450293980468
epoch: 4 iter: 499 loss: 0.3995721722425545
epoch: 4 iter: 549 loss: 0.4456956018866122
epoch: 4 iter: 599 loss: 0.4392767853159054
epoch: 4 iter: 649 loss: 0.4280408170834014
epoch: 4 iter: 699 loss: 0.450581477121531
epoch: 4 iter: 749 loss: 0.4603508026847052
epoch: 4 iter: 799 loss: 0.4875141328131904
epoch: 4 iter: 849 loss: 0.4900205970203743
epoch: 4 iter: 899 loss: 0.4915161546918932
epoch: 4 iter: 949 loss: 0.4764392847897999
epoch: 4 iter: 999 loss: 0.44895399348171317
epoch: 4 iter: 1049 loss: 0.4485356870621002
epoch: 4 it

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




epoch: 4 val_score: 0.7356213337141545 time: 1102.187315940857


In [74]:
class LSTMDiscriminator(nn.Module):
    def __init__(self, vocab_size, embedding_size, filters_per_ngram=10, ngrams=(3, 7), dense_layer=64):
        super(CNNDiscriminator, self).__init__()
        self.embedding = nn.Linear(vocab_size, embedding_size)
        self.convs = nn.ModuleList(
            [nn.Conv2d(1, filters_per_ngram, (ngram, embedding_size)) for ngram in range(*ngrams)]
        )
        self.dense1 = nn.Linear(filters_per_ngram * len(self.convs), dense_layer)
        self.output = nn.Linear(dense_layer, 1)
        
    def forward(self, input_sequence):
        embedding = F.relu(self.embedding(input_sequence)).unsqueeze(0)
        out = F.relu(F.max_pool1d(torch.cat([conv(embedding) for conv in convs])))
        out = F.relu(self.dense1(out))
        return F.sigmoid(self.output(out))
        

In [85]:
def train_discriminator(disc_model, gen_model, opt, alph_X, train_X, train_Y, n_epochs=5):
    cur_loss = 0
    for epoch in range(n_epochs):
        disc_model.train()
        gen_model.eval()
        start_time = time.time()
        for i, (x, y) in enumerate(batch_iterator(train_X, train_Y)):
            inputs = Variable(alph_X.get_one_hot_repr(torch.from_numpy(x)))
            targets = Variable(torch.from_numpy(y))
            real_data_pred = disc_model(targets)
            gen_data_pred = disc_model(gen_model(inputs))
            loss = nn.BCELoss(gen_data_pred, Variable(torch.zeros_like(gen_data_pred))) \
                    + nn.BCELoss(real_data_pred, Variable(torch.ones_like(real_data_pred)))
            cur_loss = 0.9 * cur_loss + 0.1 * loss.data[0]
            loss.backward()
            opt.step()
            opt.zero_grad()
            if i % 50 == 49:
                print(loss)

In [86]:
disc = CNNDiscriminator(len(be), 65, 10)
disc_opt = optim.Adam(disc.parameters(), lr=1e-3)

In [88]:
train_discriminator(disc, model, disc_opt, ru, train_X, train_Y)

RuntimeError: Expected object of type Variable[torch.FloatTensor] but found type Variable[torch.LongTensor] for argument #1 'mat1'

In [None]:
model.translate("полемики")

In [None]:
val_src_words[1]

In [None]:
val_trg_words[1]

In [None]:
for x, y in zip(val_src_words[:10], val_trg_words):
    tr = model.translate(x[:-5])[:-5]
    y = y[:-5]
    
    print(tr, y, ed.eval(tr, y))

In [None]:
translate(model, "")

In [None]:
from tqdm import tqdm_notebook
import editdistance as ed


scs = []
with open(TEST_FILE, "r") as ftr:
    for ruw, bew in map(lambda x: x.split(), 
                        filter(lambda x: len(x.split()) == 2, tqdm_notebook(ftr.readlines()[:2000]))):
        res = model.translate(ruw)
        scs.append(ed.eval(bew, res[:-5]))
        print(ruw, bew, res)
    
        

In [None]:
! pip3 install --user editdistance

In [None]:
np.mean(scs)

In [None]:
! head -n 50 data/ru-be.txt