In [16]:
import tensorflow as tf
from data_prepare import prepareData, Lang
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tqdm import tqdm_notebook

tf.enable_eager_execution()

In [17]:
import random
import numpy as np

В качестве датасета возьмем корпус пар, состящих из фраз на английском и французском языках взятых с https://www.manythings.org/anki/

Функции для предобработки датасета находятся в data_prepare.py в директории с этим ноутбуком

In [21]:
MAX_LENGTH = 10

eng_prefixes = (
    "i am ", "i m ",
    "he is", "he s ",
    "she is", "she s",
    "you are", "you re ",
    "we are", "we re ",
    "they are", "they re "
)

In [22]:
input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
print(repr(random.choice(pairs)))

Reading lines...
Read 135842 sentence pairs
Trimmed to 10853 sentence pairs
Counting words...
Counted words:
fra 4490
eng 2926
['je suis conscient de mes responsabilites .', 'i m aware of my responsibilities .']


In [23]:
print('input lang - %s' % input_lang.name)
print('output lang - %s' % output_lang.name)

input lang - fra
output lang - eng


$\textbf{Preparing Training Data}$

Преобразуем датсет с оригинальными строками на французском и их переводом на английский в пары из тензоров.

Единственное на чем здесь можно акцентировать внимание это то, что я немного изменил функции из туториала так, чтобы можно было сделать батчи с `фиксированными` длинами строк, благодаря функции `pad_sequences` строки с длиной меньше чем фиксированный размер будут дополняться специальным токеном `UNDEF_token='_'`, а строки большей длины будут обрезаться.

In [25]:
SOS_token = 1
EOS_token = 2
UNDEF_token = 0
MAX_LENGTH = 10


def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    indexes = pad_sequences([indexes], maxlen=MAX_LENGTH, value=0, padding='post')
    return tf.constant(indexes[0], dtype=tf.int64)


def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

$\textbf{Making dataset with tf.Data}$

In [184]:
BATCH_SIZE = 100

# Получим датасет из батчей по которому будем итерироваться в процессе обучения
# опция drop_remainder нужна для того чтобы выкинуть последний батч в датасете
#
# Каждый батч состоит из пар последовательностей индексов для словарей которые подаем 
# на вход и на выход

tf_pairs = list(map(tensorsFromPair, pairs))
dataset = tf.data.Dataset.from_tensor_slices(tf_pairs)\
                         .shuffle(1000)\
                         .batch(BATCH_SIZE, drop_remainder=True)

In [185]:
for i in dataset.take(2):
    print(i)

tf.Tensor(
[[[  7  28  83 ...   0   0   0]
  [  3   4  58 ...   0   0   0]]

 [[  7  12 298 ...   0   0   0]
  [  3  17 184 ...   0   0   0]]

 [[  7  12 183 ...   0   0   0]
  [  3   4 117 ...   0   0   0]]

 ...

 [[  7  12 448 ...   0   0   0]
  [  3   4 259 ...   0   0   0]]

 [[  3   4 455 ...   2   0   0]
  [  3   4 266 ...   0   0   0]]

 [[  7  12 305 ...   0   0   0]
  [  3   4 191 ...   0   0   0]]], shape=(100, 2, 10), dtype=int64)
tf.Tensor(
[[[  7 299  12 ...   2   0   0]
  [  3   4 185 ...   0   0   0]]

 [[  7  12 115 ...   0   0   0]
  [  3   4  74 ...   0   0   0]]

 [[  7  12 313 ...   0   0   0]
  [  3   4 197 ...   0   0   0]]

 ...

 [[  7 299 496 ...   2   0   0]
  [  3   4 289 ...   0   0   0]]

 [[124 127 578 ...   0   0   0]
  [ 78  79 112 ...   0   0   0]]

 [[124 127 804 ...   0   0   0]
  [ 78  79 196 ...   0   0   0]]], shape=(100, 2, 10), dtype=int64)


$\textbf{Model Construction}$

    class EncoderRNN()---+
                         |
                         |           
                         +---+--->  class Seq2seqModel()
                             |
                             |
    class DecoderRNN(object)-+


In [145]:
class EncoderRNN(object):
    def __init__(self, hidden_size, embedding_size, vocab_size, batch_size):
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.vocab_size = vocab_size
        self.batch_size = batch_size
        
        self.EmbeddingMTX = tf.get_variable('embedding_matrix',
                                            shape=[vocab_size, embedding_size],
                                            dtype=tf.float32)
        self.LSTM = tf.nn.rnn_cell.LSTMCell(num_units=self.hidden_size)
    

    def forward(self, input_tensor, emb_matrix=None):
        # use single embedding matrix for Encoder and Decoder
        if not emb_matrix:
            emb_matrix = self.EmbeddingMTX
        
        # zero hidden state fot LSTM
        initial_state = self.LSTM.zero_state(self.batch_size, 
                                             dtype=tf.float32)
        
        # to Embedding matrix: [batch_size, len of sequence of words(inds)]
        embedded = tf.nn.embedding_lookup(emb_matrix, input_tensor)
        
        # to dynamic LSTM: [batch_size, timestamp(len of sequence of inds), embedding_size]
        output, state = tf.nn.dynamic_rnn(self.LSTM, 
                                          inputs=embedded, 
                                          initial_state=initial_state,
                                          dtype=tf.float32)
        return output, state


In [146]:
class DecoderRNN(object):
    def __init__(self, batch_size, word2index, index2word, len_of_prediction, hidden_size, embedding_size):
        self.word2index = word2index
        self.index2word = index2word
        self.len_of_prediction = len_of_prediction
        self.hidden_size = hidden_size
        self.batch_size = batch_size

        self.EmbeddingMTX = tf.get_variable('embedding_matrix',
                                            shape=[len(self.index2word), embedding_size],
                                            dtype=tf.float32)
        
        self.LSTM = tf.nn.rnn_cell.LSTMCell(num_units=self.hidden_size)
        self.word_predictor = tf.layers.Dense(len(self.index2word),
                                                 activation=None)
    
    
    def forward(self, sos = 'SOS', state=None, emb_matrix=None):
        # use single embedding matrix for Encoder and Decoder
        if not emb_matrix:
            emb_matrix = self.EmbeddingMTX
        
        if not state:
            state = self.LSTM.zero_state(self.batch_size, dtype=tf.float32)
        
        # sos - indx of <SOS>
        ind = 1 # self.word2index[sos]
        output = tf.convert_to_tensor([[ind] for i in range(self.batch_size)],
                                      dtype=tf.int32)
        
        # words_predicted, words_logits = [], []
        words_logits, outputs = [], []

        for step in range(self.len_of_prediction):
            embedded = tf.nn.embedding_lookup(emb_matrix, output)[:,0,:]
            
            # to LSTM
            output, state = self.LSTM(embedded, state)
            outputs.append(output)
            
            # to Dense layer: [batch_size, hidden_size]
            # from Dense layer: [batch_size, vocabulary_size]
            logits = self.word_predictor(output)
            logits = tf.nn.softmax(logits)
            words_logits.append(logits)
            
            pred_word = tf.argmax(logits, 1).numpy()
            output = tf.reshape(pred_word, [self.batch_size, 1])
        
        # from [len_of_prediction, batch_size]
        # to [batch_size, len_of_prediction]
        words_logits = tf.stack(words_logits, axis=1)
        
        # from [timestamp, batch_size, hidden_size]
        # to [batch_size, timestamp, hidden_size]
        outputs = tf.stack(outputs, 1)
        
        return outputs, words_logits
    

In [152]:
# Это немного костыльненько, но просто хотелось бы передать датасетик чтобы понимать как модель ведет себя 
# на данных из датасета в процессе обучения

input_test = ['je suis gros .', 
              'c est un scientifique .', 
              'nous sommes amies .', 
              'je suis en train de conduire .', 
              'tu as presque raison .']
output_test = ['i m fat .', 
               'he is a scientist .',
               'we re friends .', 
               'i m driving .',
               'you re about right .']

In [156]:
class Seq2seqModel(object):
    def __init__(self, input_lang, output_lang, batch_size, hidden_size, 
                 len_of_prediction, embedding_size):
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.input_lang = input_lang
        self.output_lang = output_lang
        self.embedding_size = embedding_size
        self.len_of_prediction = len_of_prediction
        
        self.init_encoder = {
            'hidden_size' : self.hidden_size,
            'embedding_size' : self.embedding_size,
            'vocab_size' : self.input_lang.n_words,
            'batch_size' : self.batch_size
        }
        self.init_decoder = {
            'batch_size' : self.batch_size,
            'word2index' : self.output_lang.word2index,
            'index2word' : self.output_lang.index2word,
            'len_of_prediction' : self.len_of_prediction,
            'hidden_size' : self.hidden_size,
            'embedding_size' : self.embedding_size
        }
        self.encoder = EncoderRNN(**self.init_encoder)
        self.decoder = DecoderRNN(**self.init_decoder)
    
    def train(self, dataset, epochs=300):
        optimizer = tf.train.AdamOptimizer()
        
        for epoch in range(epochs):
            print('================================')
            print('Epoch: {}'.format(epoch))
            losses = []
            for j, item in enumerate(tqdm_notebook(dataset, total=(10853 // BATCH_SIZE))):
                X_batch, Y_batch = tf.unstack(item, axis = 1)
                loss = self._get_loss(X_batch, Y_batch)
                optimizer.minimize(lambda: self._get_loss(X_batch, Y_batch))
                losses.append(loss)
                
                if j % 30 == 0:
                    print('Loss {}'.format(loss.numpy()))
    
            if epoch % 1 == 0:
                print('========================')
                print('Loss per epoch: {}'.format(np.mean(losses)))
                print('Example:\n')
                for j, i in enumerate(input_test):
                    print('> %s\n= %s\n< %s\n\n' % (i, self.predict_by_phrase(i), output_test[j]))

    def _get_loss(self, X_batch, Y_batch):
        _, state = self.encoder.forward(X_batch)
        out, pred = self.decoder.forward(state=state)
        target = tf.one_hot(Y_batch, depth=self.output_lang.n_words)

        # Loss computing:
        cross_entropy = target*tf.log(tf.clip_by_value(pred, 1e-10, 1))
        cost_sum = -tf.reduce_sum(cross_entropy)

        m = cross_entropy.numpy()
        N = m.shape[0] * m.shape[1]
        
        cost_sum /= N
        return cost_sum
    
    def _tensorFromSentence(self, sentence):
        indexes = [self.input_lang.word2index[word] for word in sentence.split(' ')]
        indexes.append(2) # end of file
        indexes = pad_sequences([indexes for _ in range(self.batch_size)], 
                                maxlen=MAX_LENGTH, value=0, padding='post')
        return tf.constant(indexes, dtype=tf.int64)
        
    def predict_by_phrase(self, sentence='elle est tranquille .'):
        input_tensor = self._tensorFromSentence(sentence)

        _, state = self.encoder.forward(input_tensor)
        out, pred = self.decoder.forward(state=state)

        pred_indexes = tf.argmax(pred, 2).numpy()[0]
        return ' '.join([self.output_lang.index2word[word] for word in pred_indexes])


Проинициализируем модель передав ей все параметры:

In [158]:
seq2seq = Seq2seqModel(input_lang, output_lang, batch_size=BATCH_SIZE, hidden_size=512, 
                       len_of_prediction=MAX_LENGTH, embedding_size=300)


Теперь обучим проинициализированную модель на подготовленном датасете вызвав метод train.
Все результаты внизу после обучения.

In [170]:
seq2seq.train(dataset, epochs=40)

Epoch: 0


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 6.312170028686523
Loss 2.1998608112335205
Loss 2.626636028289795
Loss 3.5726735591888428
Loss per epoch: 2.854418992996216
Example:

> je suis gros .
= i m not to to to . . EOS _
< i m fat .


> c est un scientifique .
= i m not to to . . . EOS _
< he is a scientist .


> nous sommes amies .
= i m not to to . . . EOS _
< we re friends .


> je suis en train de conduire .
= i m not to to to . . EOS _
< i m driving .


> tu as presque raison .
= i m not to to . . . EOS _
< you re about right .


Epoch: 1


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 4.0309953689575195
Loss 1.8843371868133545
Loss 2.4437544345855713
Loss 3.3427300453186035
Loss per epoch: 2.559222459793091
Example:

> je suis gros .
= i m not to to the . . EOS _
< i m fat .


> c est un scientifique .
= i m not to the . . . EOS _
< he is a scientist .


> nous sommes amies .
= i m not to the . . EOS _ _
< we re friends .


> je suis en train de conduire .
= i m not to to the . . EOS _
< i m driving .


> tu as presque raison .
= i m not to the . . EOS _ _
< you re about right .


Epoch: 2


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 3.6467111110687256
Loss 1.7643864154815674
Loss 2.4528543949127197
Loss 3.4459102153778076
Loss per epoch: 2.4927592277526855
Example:

> je suis gros .
= i m not to to . . . EOS _
< i m fat .


> c est un scientifique .
= i re not to to . . EOS _ _
< he is a scientist .


> nous sommes amies .
= i re not to . . . EOS _ _
< we re friends .


> je suis en train de conduire .
= i m not to to . . . EOS _
< i m driving .


> tu as presque raison .
= i re not to to . . EOS _ _
< you re about right .


Epoch: 3


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 3.3401951789855957
Loss 1.7466243505477905
Loss 2.4311885833740234
Loss 3.1552302837371826
Loss per epoch: 2.4109437465667725
Example:

> je suis gros .
= i is not to to . . . EOS _
< i m fat .


> c est un scientifique .
= you re not to the . . EOS _ _
< he is a scientist .


> nous sommes amies .
= you re not to . . EOS _ _ _
< we re friends .


> je suis en train de conduire .
= i m not to to . . . EOS _
< i m driving .


> tu as presque raison .
= you re not to . . EOS _ _ _
< you re about right .


Epoch: 4


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 3.204733371734619
Loss 1.5794230699539185
Loss 2.1220712661743164
Loss 3.0040535926818848
Loss per epoch: 2.2928740978240967
Example:

> je suis gros .
= i m not to to . . EOS _ _
< i m fat .


> c est un scientifique .
= he is not to . . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= you re very to . . EOS _ _ _
< we re friends .


> je suis en train de conduire .
= i m not to to . . EOS _ _
< i m driving .


> tu as presque raison .
= you re very to . . EOS _ _ _
< you re about right .


Epoch: 5


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 2.8363358974456787
Loss 1.4489213228225708
Loss 2.0435266494750977
Loss 2.8943278789520264
Loss per epoch: 2.1296026706695557
Example:

> je suis gros .
= i m not to to . . EOS _ _
< i m fat .


> c est un scientifique .
= he is a to . . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re very to . . EOS _ _ _
< we re friends .


> je suis en train de conduire .
= i m not to to . . EOS _ _
< i m driving .


> tu as presque raison .
= you re very to . . EOS _ _ _
< you re about right .


Epoch: 6


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 2.733501672744751
Loss 1.4531934261322021
Loss 2.082425355911255
Loss 2.8252499103546143
Loss per epoch: 2.0831027030944824
Example:

> je suis gros .
= i m not to . . . EOS _ _
< i m fat .


> c est un scientifique .
= he is a to the . . EOS _ _
< he is a scientist .


> nous sommes amies .
= we are very . . EOS EOS _ _ _
< we re friends .


> je suis en train de conduire .
= i m not to to . . EOS _ _
< i m driving .


> tu as presque raison .
= you re very to . . EOS _ _ _
< you re about right .


Epoch: 7


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 2.627941846847534
Loss 1.334791898727417
Loss 2.0984623432159424
Loss 2.7370307445526123
Loss per epoch: 1.9890564680099487
Example:

> je suis gros .
= i m not to . . . EOS _ _
< i m fat .


> c est un scientifique .
= he is always to the . . EOS _ _
< he is a scientist .


> nous sommes amies .
= we re very . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m not to to you . EOS _ _
< i m driving .


> tu as presque raison .
= you re very very . . EOS _ _ _
< you re about right .


Epoch: 8


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 2.164961338043213
Loss 1.3301985263824463
Loss 1.9876543283462524
Loss 2.6555659770965576
Loss per epoch: 1.9222880601882935
Example:

> je suis gros .
= i m very with . . EOS _ _ _
< i m fat .


> c est un scientifique .
= he is a to his . . EOS _ _
< he is a scientist .


> nous sommes amies .
= we re very . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m not to to . . EOS _ _
< i m driving .


> tu as presque raison .
= you re very very . . EOS _ _ _
< you re about right .


Epoch: 9


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 2.1082756519317627
Loss 1.2642173767089844
Loss 1.9085196256637573
Loss 2.6723177433013916
Loss per epoch: 1.866188645362854
Example:

> je suis gros .
= i m very . . EOS _ _ _ _
< i m fat .


> c est un scientifique .
= he is a to the . . EOS _ _
< he is a scientist .


> nous sommes amies .
= we re very . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m not to with . . EOS _ _
< i m driving .


> tu as presque raison .
= you re not very . . EOS _ _ _
< you re about right .


Epoch: 10


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.768976092338562
Loss 1.3168013095855713
Loss 1.723635196685791
Loss 2.6870319843292236
Loss per epoch: 1.8156710863113403
Example:

> je suis gros .
= i m very . . EOS _ _ _ _
< i m fat .


> c est un scientifique .
= he is a the the . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re very . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m not to of . . EOS _ _
< i m driving .


> tu as presque raison .
= you re very sophisticated . . EOS _ _ _
< you re about right .


Epoch: 11


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.6950764656066895
Loss 1.203361988067627
Loss 1.7560185194015503
Loss 2.446746826171875
Loss per epoch: 1.756696105003357
Example:

> je suis gros .
= i m very . . . EOS _ _ _
< i m fat .


> c est un scientifique .
= he is a of the . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re all . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m going to your . . EOS _ _
< i m driving .


> tu as presque raison .
= you re not very . . EOS _ _ _
< you re about right .


Epoch: 12


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.6133925914764404
Loss 1.215316891670227
Loss 1.7621142864227295
Loss 2.3482279777526855
Loss per epoch: 1.7037354707717896
Example:

> je suis gros .
= i m being . EOS EOS _ _ _ _
< i m fat .


> c est un scientifique .
= he is a a . . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re all . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i am very of of . . EOS _ _
< i m driving .


> tu as presque raison .
= you re very very . . EOS _ _ _
< you re about right .


Epoch: 13


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.5415149927139282
Loss 1.1673229932785034
Loss 1.6745229959487915
Loss 2.2326722145080566
Loss per epoch: 1.6383352279663086
Example:

> je suis gros .
= i m being . EOS EOS _ _ _ _
< i m fat .


> c est un scientifique .
= he is a a . . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re all . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m very of your . . EOS _ _
< i m driving .


> tu as presque raison .
= you re very sophisticated . . EOS _ _ _
< you re about right .


Epoch: 14


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.4169020652770996
Loss 1.067388653755188
Loss 1.5800566673278809
Loss 2.0677645206451416
Loss per epoch: 1.5728133916854858
Example:

> je suis gros .
= i m being . EOS EOS _ _ _ _
< i m fat .


> c est un scientifique .
= he is a a . . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re all friends . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i am a of your . . EOS _ _
< i m driving .


> tu as presque raison .
= you re very sophisticated . . EOS _ _ _
< you re about right .


Epoch: 15


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.4337666034698486
Loss 1.0972259044647217
Loss 1.4944225549697876
Loss 2.1536645889282227
Loss per epoch: 1.5123423337936401
Example:

> je suis gros .
= i m being . EOS EOS _ _ _ _
< i m fat .


> c est un scientifique .
= he is a a student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re all . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i am a of your . . EOS _ _
< i m driving .


> tu as presque raison .
= you re not very . . EOS _ _ _
< you re about right .


Epoch: 16


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.2842479944229126
Loss 1.0664513111114502
Loss 1.3847533464431763
Loss 1.9916343688964844
Loss per epoch: 1.4515280723571777
Example:

> je suis gros .
= i m being . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a a student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re all . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i am a of the . . EOS _ _
< i m driving .


> tu as presque raison .
= you re not being . . EOS _ _ _
< you re about right .


Epoch: 17


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.3398692607879639
Loss 1.08298659324646
Loss 1.3903230428695679
Loss 1.8367851972579956
Loss per epoch: 1.3841880559921265
Example:

> je suis gros .
= i m being . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a a student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re all . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i am in of your . . EOS _ _
< i m driving .


> tu as presque raison .
= you re too very . . EOS _ _ _
< you re about right .


Epoch: 18


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.3593722581863403
Loss 0.9335301518440247
Loss 1.353674054145813
Loss 1.6292634010314941
Loss per epoch: 1.3168004751205444
Example:

> je suis gros .
= i m being . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a a student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re all . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i am in the my . . EOS _ _
< i m driving .


> tu as presque raison .
= you re always complaining . . EOS _ _ _
< you re about right .


Epoch: 19


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.0994900465011597
Loss 0.7638909816741943
Loss 1.25179123878479
Loss 1.6325719356536865
Loss per epoch: 1.2383722066879272
Example:

> je suis gros .
= i m an . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a a student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re all . . EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i am looking for the . . EOS _ _
< i m driving .


> tu as presque raison .
= you re always complaining . EOS _ _ _ _
< you re about right .


Epoch: 20


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.1712243556976318
Loss 0.8337421417236328
Loss 1.2146272659301758
Loss 1.6266124248504639
Loss per epoch: 1.1823992729187012
Example:

> je suis gros .
= i m tired . EOS EOS _ _ _ _
< i m fat .


> c est un scientifique .
= he is a a student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m looking for the . . EOS _ _
< i m driving .


> tu as presque raison .
= you re always here . . EOS _ _ _
< you re about right .


Epoch: 21


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.1028287410736084
Loss 0.7996562719345093
Loss 1.053290605545044
Loss 1.4364688396453857
Loss per epoch: 1.1097334623336792
Example:

> je suis gros .
= i m very . EOS EOS _ _ _ _
< i m fat .


> c est un scientifique .
= he is a a student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m looking for the . . EOS _ _
< i m driving .


> tu as presque raison .
= you re always complaining . EOS EOS _ _ _
< you re about right .


Epoch: 22


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 1.0194745063781738
Loss 0.6988573670387268
Loss 1.1156604290008545
Loss 1.2926090955734253
Loss per epoch: 1.0411356687545776
Example:

> je suis gros .
= i m an . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a freelance student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re in . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m going to the . . EOS _ _
< i m driving .


> tu as presque raison .
= you re always complaining . EOS _ _ _ _
< you re about right .


Epoch: 23


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.9582945108413696
Loss 0.7473098635673523
Loss 1.1715102195739746
Loss 1.1963865756988525
Loss per epoch: 0.9783791303634644
Example:

> je suis gros .
= i m very . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a freelance student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re in . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m looking for the . . EOS _ _
< i m driving .


> tu as presque raison .
= you re always complaining . EOS _ _ _ _
< you re about right .


Epoch: 24


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.9657268524169922
Loss 0.7067056894302368
Loss 0.9006253480911255
Loss 1.1495217084884644
Loss per epoch: 0.9069824814796448
Example:

> je suis gros .
= i m very . EOS EOS _ _ _ _
< i m fat .


> c est un scientifique .
= he is a freelance student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re trapped . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m going to the . EOS EOS _ _
< i m driving .


> tu as presque raison .
= you re always complaining . . EOS _ _ _
< you re about right .


Epoch: 25


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.7826858758926392
Loss 0.5947918891906738
Loss 0.834660530090332
Loss 1.173421025276184
Loss per epoch: 0.8385549187660217
Example:

> je suis gros .
= i m very . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a freelance student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re in . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m looking for the . . EOS _ _
< i m driving .


> tu as presque raison .
= you re always complaining . EOS _ _ _ _
< you re about right .


Epoch: 26


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.7655611038208008
Loss 0.6252096891403198
Loss 0.7750071287155151
Loss 1.0394052267074585
Loss per epoch: 0.7698559761047363
Example:

> je suis gros .
= i m very . EOS EOS _ _ _ _
< i m fat .


> c est un scientifique .
= he is a freelance journalist . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re in . EOS EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m going to the . EOS EOS _ _
< i m driving .


> tu as presque raison .
= you re always complaining . EOS _ _ _ _
< you re about right .


Epoch: 27


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.6957069039344788
Loss 0.5188117623329163
Loss 0.7693923711776733
Loss 0.9341332912445068
Loss per epoch: 0.7042502164840698
Example:

> je suis gros .
= i m yours . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a teacher artist . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m looking for the . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re always right . EOS _ _ _ _
< you re about right .


Epoch: 28


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.6935040950775146
Loss 0.4584909677505493
Loss 0.6253865957260132
Loss 0.8046034574508667
Loss per epoch: 0.6452956795692444
Example:

> je suis gros .
= i m very . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a freelance journalist . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m looking for batteries . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re right something . . EOS _ _ _
< you re about right .


Epoch: 29


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.6319496035575867
Loss 0.4485231935977936
Loss 0.4791713058948517
Loss 0.7680400609970093
Loss per epoch: 0.5840458273887634
Example:

> je suis gros .
= i m fat . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a freelance student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m looking for the . EOS EOS _ _
< i m driving .


> tu as presque raison .
= you re right right . EOS _ _ _ _
< you re about right .


Epoch: 30


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.6224503517150879
Loss 0.4123295247554779
Loss 0.5884775519371033
Loss 0.7041894793510437
Loss per epoch: 0.5288646817207336
Example:

> je suis gros .
= i m tough . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a mathematical artist . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m going to get . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re right right right . EOS _ _ _
< you re about right .


Epoch: 31


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.6185435056686401
Loss 0.3608451187610626
Loss 0.44800928235054016
Loss 0.5431381464004517
Loss per epoch: 0.47531044483184814
Example:

> je suis gros .
= i m tough . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a mathematical student . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS EOS _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m going to get . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re right right right . EOS _ _ _
< you re about right .


Epoch: 32


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.40786606073379517
Loss 0.31696417927742004
Loss 0.377921462059021
Loss 0.6086967587471008
Loss per epoch: 0.4235711693763733
Example:

> je suis gros .
= i m fat . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a mathematical genius . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m going to get . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re right right right . EOS _ _ _
< you re about right .


Epoch: 33


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.3917480409145355
Loss 0.29794785380363464
Loss 0.3476254940032959
Loss 0.4154069423675537
Loss per epoch: 0.37794604897499084
Example:

> je suis gros .
= i m stuck . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a mathematical scientist . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m getting to get . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re right right right . EOS _ _ _
< you re about right .


Epoch: 34


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.4460071623325348
Loss 0.2767294943332672
Loss 0.3549381196498871
Loss 0.4017915725708008
Loss per epoch: 0.335326224565506
Example:

> je suis gros .
= i m fat . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a scientist . . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i am going to get . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re almost right right . EOS _ _ _
< you re about right .


Epoch: 35


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.42313775420188904
Loss 0.18551886081695557
Loss 0.3671933710575104
Loss 0.3823480010032654
Loss per epoch: 0.2938053607940674
Example:

> je suis gros .
= i m fat . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a mathematical genius . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m getting . . . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re right right right . EOS _ _ _
< you re about right .


Epoch: 36


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.32524192333221436
Loss 0.1555938422679901
Loss 0.2430633306503296
Loss 0.32945874333381653
Loss per epoch: 0.25955483317375183
Example:

> je suis gros .
= i m fat . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a mathematical genius . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m going to get . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re almost right . EOS _ _ _ _
< you re about right .


Epoch: 37


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.25091439485549927
Loss 0.12063122540712357
Loss 0.19824109971523285
Loss 0.28270405530929565
Loss per epoch: 0.23243795335292816
Example:

> je suis gros .
= i m fat . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a scientist . EOS _ _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m going to EOS . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re almost right right . EOS _ _ _
< you re about right .


Epoch: 38


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.2600648105144501
Loss 0.19470494985580444
Loss 0.13638757169246674
Loss 0.2524903118610382
Loss per epoch: 0.20569764077663422
Example:

> je suis gros .
= i m fat . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a mathematical genius . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m going to EOS . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re right right right . EOS _ _ _
< you re about right .


Epoch: 39


HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Loss 0.1932673305273056
Loss 0.16346164047718048
Loss 0.2081863284111023
Loss 0.1726379096508026
Loss per epoch: 0.18129579722881317
Example:

> je suis gros .
= i m fat . EOS _ _ _ _ _
< i m fat .


> c est un scientifique .
= he is a mathematical scientist . EOS _ _ _
< he is a scientist .


> nous sommes amies .
= we re friends . EOS _ _ _ _ _
< we re friends .


> je suis en train de conduire .
= i m going to EOS . EOS _ _ _
< i m driving .


> tu as presque raison .
= you re almost right . EOS _ _ _ _
< you re about right .




Теперь попробуем перевести с помощью нашей обученной модели какие-нибудь фразы не из датасета.
Взял из головы и перевел в Яндекс.Переводчик'е

In [178]:
seq2seq.predict_by_phrase('je suis heureux de la fortune') # я счастлив от удачи

'i am happy the around this . EOS _ _'

In [181]:
seq2seq.predict_by_phrase('je vais aller nager') # я собираюсь пойти плавать

'i m going to go out day . EOS _'

In [183]:
seq2seq.predict_by_phrase('nous allons manger') # мы будем кушать

'we re going to eat . EOS _ _ _'

По-моему первые результаты получились неплохие при том что модель по сути просто seq2seq с эмбеддингами.

Эту модель я строил по факту как бейзлайн, поэтому вот что хотел бы изменить и добавить:
 - добавить attention
 - взять другой датасет
 - добавить dropout
 - сделать из этой модели чатбота, я предусмотрел там возможность использования одной матрицы эмбеддингов для энкодера и декодера, тут это не применимо так как два разных словаря, но в случае с чатботом язык будет один и соответственно такая ситуация возможна, в результате матрица эмбеддингов должна будет обучаться быстрее и качественее
 - сейчас у меня лосс считается по всей возможной длине последовательностей слов в батчах, не учитывая разные длины предложений и хотя такой подход работает, я бы хотел это доработать

**Очень интересный результат получился на 36 эпохе при обучении

>\> c est un scientifique .<br /> 
= he is a mathematical genius . EOS _ _ _<br /> 
< he is a scientist .