In [1]:
from collections import Counter

import nltk
import NMT_Model
import nmt_data_utils
import nmt_model_utils

In [2]:
with open('news-commentary-v8.de-en.en',
          'r',
          encoding = 'utf-8') as f:
    en = f.readlines()
    
with open('news-commentary-v8.de-en.de',
          'r',
          encoding = 'utf-8') as f:
    de = f.readlines()

In [3]:
# first 5 sentence pairs. 
for line in zip(en[:5], de[:5]):
    print(line, '\n')

('SAN FRANCISCO – It has never been easy to have a rational conversation about the value of gold.\n', 'SAN FRANCISCO – Es war noch nie leicht, ein rationales Gespräch über den Wert von Gold zu führen.\n') 

('Lately, with gold prices up more than 300% over the last decade, it is harder than ever.\n', 'In letzter Zeit allerdings ist dies schwieriger denn je, ist doch der Goldpreis im letzten Jahrzehnt um über 300 Prozent angestiegen.\n') 

('Just last December, fellow economists Martin Feldstein and Nouriel Roubini each penned op-eds bravely questioning bullish market sentiment, sensibly pointing out gold’s risks.\n', 'Erst letzten Dezember verfassten meine Kollegen Martin Feldstein und Nouriel Roubini Kommentare, in denen sie mutig die vorherrschende optimistische Marktstimmung hinterfragten und sehr überlegt auf die Risiken des Goldes \xa0hinwiesen.\n') 

('Wouldn’t you know it?\n', 'Und es kam, wie es kommen musste.\n') 

('Since their articles appeared, the price of gold has moved u

In [4]:
# remove unnecessary new lines. 
de = [line.strip() for line in de]
en = [line.strip() for line in en]

In [5]:
# we will only use sentences of similar lengths in order to make training easier. 
len_en = [len(sent) for sent in en if 20 < len(sent) < 50]
len_dist = Counter(len_en).most_common()
len_dist

[(49, 599),
 (48, 599),
 (46, 583),
 (47, 547),
 (43, 514),
 (44, 512),
 (45, 511),
 (41, 509),
 (40, 503),
 (42, 490),
 (39, 477),
 (38, 443),
 (37, 438),
 (36, 421),
 (34, 412),
 (33, 365),
 (32, 358),
 (31, 353),
 (35, 346),
 (30, 326),
 (28, 324),
 (27, 273),
 (25, 260),
 (29, 254),
 (26, 250),
 (24, 233),
 (23, 232),
 (22, 214),
 (21, 208)]

In [6]:
# 11554 sentences that contain betwenn 20 and 50 words.
len(len_en)

11554

In [7]:
_de = []
_en = []
for sent_de, sent_en in zip(de, en):
    if 20 < len(sent_en) < 50:
        _de.append(sent_de)
        _en.append(sent_en)

In [8]:
# but we will not use all 150 000 sentences, only 5000 for the beginning.
text = _en[:2000]
language='english'
lower=True
words = []
tokenized_text = []

for line in text:
    tokenized = nltk.word_tokenize(line, language=language)
    if lower:
        tokenized = [word.lower() for word in tokenized]
    tokenized_text.append(tokenized)
    for word in tokenized:
        words.append(word)

most_common = Counter(words).most_common()
en_preprocessed = tokenized_text
en_most_common = most_common

text = _de[:2000]
language='german'
lower=True
words = []
tokenized_text = []

for line in text:
    tokenized = nltk.word_tokenize(line, language=language)
    if lower:
        tokenized = [word.lower() for word in tokenized]
    tokenized_text.append(tokenized)
    for word in tokenized:
        words.append(word)

most_common = Counter(words).most_common()
de_preprocessed = tokenized_text
de_most_common = most_common

en_preprocessed

[['wouldn', '’', 't', 'you', 'know', 'it', '?'],
 ['since',
  'then',
  ',',
  'the',
  'index',
  'has',
  'climbed',
  'above',
  '10,000',
  '.'],
 ['they',
  'departed',
  'pledging',
  'to',
  'revive',
  'europe',
  "'s",
  'growth',
  '.'],
 ['we', "'ve", 'heard', 'that', 'empty', 'promise', 'before', '.'],
 ['many', 'europeans', 'are', 'sick', 'of', 'british', 'vetoes', '.'],
 ['precedents', 'exist', 'for', 'this', '.'],
 ['nato', 'has', 'been', 'flexible', 'since', 'its', 'inception', '.'],
 ['these', 'precedents', 'can', 'be', 'extended', '.'],
 ['i', 'am', 'not', 'talking', 'about', 'a', 'two‑speed', 'europe', '.'],
 ['but', 'i', 'do', "n't", 'want', 'to', 'block', 'their', 'way', '.'],
 ['elected', 'parliaments', 'do', 'not', 'own', 'our', 'liberties', '.'],
 ['but', 'the', 'big', 'question', 'remains', ':', 'how', '?'],
 ['the', 'need', 'for', 'immediate', 'action', 'is', 'clear', '.'],
 ['the', 'new', 'year', 'is', 'looking', 'grim', '.'],
 ['france', 'is', 'flat-lining',

In [23]:
len(en_preprocessed), len(de_preprocessed)

(2000, 2000)

In [10]:
# for some of the sentences there is not german or english counterpart, i.e. only an empy array []
# therefore we will remove those sentence pairs.
en_preprocessed_clean, de_preprocessed_clean = [], []

for sent_en, sent_de in zip(en_preprocessed, de_preprocessed):
    if sent_en != [] and sent_de != []:
        en_preprocessed_clean.append(sent_en)
        de_preprocessed_clean.append(sent_de)
    else:
        continue

In [11]:
len(en_preprocessed_clean), len(de_preprocessed_clean)

(1992, 1992)

In [12]:
for e, d in zip(en_preprocessed_clean, de_preprocessed_clean[:5]):
    print('English:\n', e)
    print('German:\n', d, '\n'*3)

English:
 ['wouldn', '’', 't', 'you', 'know', 'it', '?']
German:
 ['und', 'es', 'kam', ',', 'wie', 'es', 'kommen', 'musste', '.'] 



English:
 ['since', 'then', ',', 'the', 'index', 'has', 'climbed', 'above', '10,000', '.']
German:
 ['seit', 'damals', 'ist', 'er', 'auf', 'über', '10.000', 'punkte', 'gestiegen', '.'] 



English:
 ['they', 'departed', 'pledging', 'to', 'revive', 'europe', "'s", 'growth', '.']
German:
 ['mit', 'der', 'zusicherung', ',', 'das', 'wachstum', 'in', 'europa', 'wieder', 'zu', 'beleben', ',', 'gingen', 'sie', 'auseinander', '.'] 



English:
 ['we', "'ve", 'heard', 'that', 'empty', 'promise', 'before', '.']
German:
 ['dieses', 'leere', 'versprechen', 'haben', 'wir', 'schon', 'einmal', 'gehört', '.'] 



English:
 ['many', 'europeans', 'are', 'sick', 'of', 'british', 'vetoes', '.']
German:
 ['viele', 'europäer', 'sind', 'die', 'britischen', 'vetos', 'leid', '.'] 





In [13]:
en_most_common[:15], len(en_most_common), len(de_most_common)

([('.', 1747),
  ('the', 648),
  ('is', 559),
  (',', 437),
  ('to', 297),
  ('this', 258),
  ('a', 239),
  ('but', 234),
  ('of', 213),
  ('not', 197),
  ('in', 193),
  ('are', 192),
  ('?', 184),
  ('it', 182),
  ('be', 161)],
 3174,
 7078)

In [14]:
# now we can create oyr lookup dicts for english and german, i.e. our vocab. 
# we will also include special tokens, later on used in the model. 
specials = ["<unk>", "<s>", "</s>", '<pad>']

en_word2ind, en_ind2word, en_vocab_size = nmt_data_utils.create_vocab(en_most_common, specials)
de_word2ind, de_ind2word, de_vocab_size = nmt_data_utils.create_vocab(de_most_common, specials)

In [15]:
# in order to feed the sentences to the network, we have to convert them to ints, corresponding to their indices
# in the lookup dicts. 
# we reverse the source language sentences, i.e. the english sentences as this alleviates learning for the seq2seq 
# model. Apart from this we also include EndOfSentence and StartOfSentence tags, which are needed as well. 
en_inds, en_unknowns = nmt_data_utils.convert_to_inds(en_preprocessed_clean, en_word2ind, reverse = True, eos = True)
de_inds, de_unknowns = nmt_data_utils.convert_to_inds(de_preprocessed_clean, de_word2ind, sos = True, eos = True)

In [16]:
[nmt_data_utils.convert_to_words(sentence, en_ind2word) for sentence in  en_inds[:2]]

[['?', 'it', 'know', 'you', 't', '’', 'wouldn', '</s>'],
 ['.',
  '10,000',
  'above',
  'climbed',
  'has',
  'index',
  'the',
  ',',
  'then',
  'since',
  '</s>']]

In [17]:
[nmt_data_utils.convert_to_words(sentence, de_ind2word) for sentence in  de_inds[:2]]

[['<s>',
  'und',
  'es',
  'kam',
  ',',
  'wie',
  'es',
  'kommen',
  'musste',
  '.',
  '</s>'],
 ['<s>',
  'seit',
  'damals',
  'ist',
  'er',
  'auf',
  'über',
  '10.000',
  'punkte',
  'gestiegen',
  '.',
  '</s>']]

In [18]:
# hyperparams. 
# those are probably not perfect, but work fine for now. 
num_layers_encoder = 4
num_layers_decoder = 4
rnn_size_encoder = 128
rnn_size_decoder = 128
embedding_dim = 300

batch_size = 64
epochs = 100
clip = 5
keep_probability = 0.8
learning_rate = 0.01
learning_rate_decay_steps = 1000
learning_rate_decay = 0.9

In [19]:
# create the graph and train the model. 
nmt_model_utils.reset_graph()

nmt = NMT_Model.NMT(en_word2ind,
                    en_ind2word,
                    de_word2ind,
                    de_ind2word,
                    './models/local_one/my_model',
                    'TRAIN',
                    embedding_dim = embedding_dim,
                    num_layers_encoder = num_layers_encoder,
                    num_layers_decoder = num_layers_decoder,
                    batch_size = batch_size,
                    clip = clip,
                    keep_probability = keep_probability,
                    learning_rate = learning_rate,
                    epochs = epochs,
                    rnn_size_encoder = rnn_size_encoder,
                    rnn_size_decoder = rnn_size_decoder, 
                    learning_rate_decay_steps = learning_rate_decay_steps,
                    learning_rate_decay = learning_rate_decay)

nmt.build_graph()
nmt.train(en_inds, de_inds)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Graph built.
-------------------- Epoch 0 of

Iteration: 10 of 31	train_loss: 5.8944
Iteration: 12 of 31	train_loss: 6.2822
Iteration: 14 of 31	train_loss: 6.0512
Iteration: 16 of 31	train_loss: 6.1870
Iteration: 18 of 31	train_loss: 5.9481
Iteration: 20 of 31	train_loss: 6.2107
Iteration: 22 of 31	train_loss: 6.3617
Iteration: 24 of 31	train_loss: 6.2235
Iteration: 26 of 31	train_loss: 6.0859
Iteration: 28 of 31	train_loss: 5.9916
Iteration: 30 of 31	train_loss: 5.7031
Iteration: 31 of 31	train_loss: 5.8459
Average Score for this Epoch: 5.9936323165893555
--- new best score ---


-------------------- Epoch 8 of 100 --------------------
Iteration: 0 of 31	train_loss: 5.7087
Iteration: 2 of 31	train_loss: 5.6267
Iteration: 4 of 31	train_loss: 5.6880
Iteration: 6 of 31	train_loss: 5.7537
Iteration: 8 of 31	train_loss: 5.6504
Iteration: 10 of 31	train_loss: 5.8859
Iteration: 12 of 31	train_loss: 6.1064
Iteration: 14 of 31	train_loss: 6.0362
Iteration: 16 of 31	train_loss: 5.9480
Iteration: 18 of 31	train_loss: 5.7712
Iteration: 20 of

Iteration: 26 of 31	train_loss: 5.7433
Iteration: 28 of 31	train_loss: 5.4801
Iteration: 30 of 31	train_loss: 5.2917
Iteration: 31 of 31	train_loss: 5.2876
Average Score for this Epoch: 5.350543022155762
--- new best score ---


-------------------- Epoch 18 of 100 --------------------
Iteration: 0 of 31	train_loss: 4.9016
Iteration: 2 of 31	train_loss: 5.2784
Iteration: 4 of 31	train_loss: 5.1539
Iteration: 6 of 31	train_loss: 5.1099
Iteration: 8 of 31	train_loss: 5.3132
Iteration: 10 of 31	train_loss: 5.3005
Iteration: 12 of 31	train_loss: 5.4459
Iteration: 14 of 31	train_loss: 5.4224
Iteration: 16 of 31	train_loss: 5.5011
Iteration: 18 of 31	train_loss: 5.1750
Iteration: 20 of 31	train_loss: 5.3760
Iteration: 22 of 31	train_loss: 5.2476
Iteration: 24 of 31	train_loss: 5.4469
Iteration: 26 of 31	train_loss: 5.5463
Iteration: 28 of 31	train_loss: 5.6332
Iteration: 30 of 31	train_loss: 5.5181
Iteration: 31 of 31	train_loss: 4.8796
Average Score for this Epoch: 5.303221702575684
--- new

Iteration: 2 of 31	train_loss: 4.8663
Iteration: 4 of 31	train_loss: 4.5990
Iteration: 6 of 31	train_loss: 4.9610
Iteration: 8 of 31	train_loss: 4.5346
Iteration: 10 of 31	train_loss: 4.6825
Iteration: 12 of 31	train_loss: 4.6003
Iteration: 14 of 31	train_loss: 4.8936
Iteration: 16 of 31	train_loss: 4.7054
Iteration: 18 of 31	train_loss: 5.0219
Iteration: 20 of 31	train_loss: 4.9747
Iteration: 22 of 31	train_loss: 4.8350
Iteration: 24 of 31	train_loss: 4.8210
Iteration: 26 of 31	train_loss: 5.0209
Iteration: 28 of 31	train_loss: 4.8098
Iteration: 30 of 31	train_loss: 4.7396
Iteration: 31 of 31	train_loss: 4.6432
Average Score for this Epoch: 4.807560443878174
--- new best score ---


-------------------- Epoch 29 of 100 --------------------
Iteration: 0 of 31	train_loss: 4.7558
Iteration: 2 of 31	train_loss: 4.5991
Iteration: 4 of 31	train_loss: 4.8351
Iteration: 6 of 31	train_loss: 4.7847
Iteration: 8 of 31	train_loss: 4.8336
Iteration: 10 of 31	train_loss: 4.7530
Iteration: 12 of 31	

Iteration: 18 of 31	train_loss: 4.6704
Iteration: 20 of 31	train_loss: 4.2844
Iteration: 22 of 31	train_loss: 4.2062
Iteration: 24 of 31	train_loss: 4.4843
Iteration: 26 of 31	train_loss: 4.6055
Iteration: 28 of 31	train_loss: 4.0798
Iteration: 30 of 31	train_loss: 4.4701
Iteration: 31 of 31	train_loss: 4.3565
Average Score for this Epoch: 4.469448089599609
--- new best score ---


-------------------- Epoch 39 of 100 --------------------
Iteration: 0 of 31	train_loss: 4.1995
Iteration: 2 of 31	train_loss: 4.5546
Iteration: 4 of 31	train_loss: 4.5060
Iteration: 6 of 31	train_loss: 4.5001
Iteration: 8 of 31	train_loss: 4.5099
Iteration: 10 of 31	train_loss: 4.2878
Iteration: 12 of 31	train_loss: 4.5528
Iteration: 14 of 31	train_loss: 4.3846
Iteration: 16 of 31	train_loss: 4.6675
Iteration: 18 of 31	train_loss: 4.3503
Iteration: 20 of 31	train_loss: 4.3803
Iteration: 22 of 31	train_loss: 4.4238
Iteration: 24 of 31	train_loss: 4.4643
Iteration: 26 of 31	train_loss: 4.2587
Iteration: 28 of

Average Score for this Epoch: 4.243043899536133
--- new best score ---


-------------------- Epoch 49 of 100 --------------------
Iteration: 0 of 31	train_loss: 4.2493
Iteration: 2 of 31	train_loss: 4.2360
Iteration: 4 of 31	train_loss: 4.2471
Iteration: 6 of 31	train_loss: 4.3872
Iteration: 8 of 31	train_loss: 4.1885
Iteration: 10 of 31	train_loss: 4.2572
Iteration: 12 of 31	train_loss: 4.2215
Iteration: 14 of 31	train_loss: 4.4021
Iteration: 16 of 31	train_loss: 4.1007
Iteration: 18 of 31	train_loss: 4.4065
Iteration: 20 of 31	train_loss: 4.1639
Iteration: 22 of 31	train_loss: 4.2742
Iteration: 24 of 31	train_loss: 4.3357
Iteration: 26 of 31	train_loss: 4.3956
Iteration: 28 of 31	train_loss: 4.5420
Iteration: 30 of 31	train_loss: 4.4759
Iteration: 31 of 31	train_loss: 4.4342
Average Score for this Epoch: 4.239203453063965
--- new best score ---


-------------------- Epoch 50 of 100 --------------------
Iteration: 0 of 31	train_loss: 4.1877
Iteration: 2 of 31	train_loss: 3.7931
Iter

Iteration: 12 of 31	train_loss: 3.7841
Iteration: 14 of 31	train_loss: 4.0259
Iteration: 16 of 31	train_loss: 3.8322
Iteration: 18 of 31	train_loss: 3.8932
Iteration: 20 of 31	train_loss: 4.2834
Iteration: 22 of 31	train_loss: 4.4109
Iteration: 24 of 31	train_loss: 4.1022
Iteration: 26 of 31	train_loss: 4.2855
Iteration: 28 of 31	train_loss: 4.2233
Iteration: 30 of 31	train_loss: 4.3012
Iteration: 31 of 31	train_loss: 3.9190
Average Score for this Epoch: 4.073274612426758
--- new best score ---


-------------------- Epoch 60 of 100 --------------------
Iteration: 0 of 31	train_loss: 4.0809
Iteration: 2 of 31	train_loss: 4.1236
Iteration: 4 of 31	train_loss: 4.0400
Iteration: 6 of 31	train_loss: 4.0632
Iteration: 8 of 31	train_loss: 3.6068
Iteration: 10 of 31	train_loss: 3.9756
Iteration: 12 of 31	train_loss: 4.1361
Iteration: 14 of 31	train_loss: 4.1054
Iteration: 16 of 31	train_loss: 4.0166
Iteration: 18 of 31	train_loss: 4.0122
Iteration: 20 of 31	train_loss: 4.0285
Iteration: 22 of

Iteration: 30 of 31	train_loss: 4.1069
Iteration: 31 of 31	train_loss: 3.8402
Average Score for this Epoch: 3.9165091514587402
-------------------- Epoch 70 of 100 --------------------
Iteration: 0 of 31	train_loss: 3.8175
Iteration: 2 of 31	train_loss: 3.5929
Iteration: 4 of 31	train_loss: 4.0194
Iteration: 6 of 31	train_loss: 4.0650
Iteration: 8 of 31	train_loss: 3.8194
Iteration: 10 of 31	train_loss: 4.0210
Iteration: 12 of 31	train_loss: 4.0793
Iteration: 14 of 31	train_loss: 3.9555
Iteration: 16 of 31	train_loss: 4.1567
Iteration: 18 of 31	train_loss: 3.9273
Iteration: 20 of 31	train_loss: 3.6919
Iteration: 22 of 31	train_loss: 3.7305
Iteration: 24 of 31	train_loss: 4.0024
Iteration: 26 of 31	train_loss: 4.1357
Iteration: 28 of 31	train_loss: 4.2905
Iteration: 30 of 31	train_loss: 4.0294
Iteration: 31 of 31	train_loss: 3.7224
Average Score for this Epoch: 3.8986971378326416
--- new best score ---


-------------------- Epoch 71 of 100 --------------------
Iteration: 0 of 31	train_

Iteration: 8 of 31	train_loss: 3.5977
Iteration: 10 of 31	train_loss: 4.2889
Iteration: 12 of 31	train_loss: 3.6365
Iteration: 14 of 31	train_loss: 3.8527
Iteration: 16 of 31	train_loss: 4.0958
Iteration: 18 of 31	train_loss: 3.6342
Iteration: 20 of 31	train_loss: 3.9314
Iteration: 22 of 31	train_loss: 3.9064
Iteration: 24 of 31	train_loss: 3.7854
Iteration: 26 of 31	train_loss: 3.6132
Iteration: 28 of 31	train_loss: 3.8635
Iteration: 30 of 31	train_loss: 3.4500
Iteration: 31 of 31	train_loss: 4.0610
Average Score for this Epoch: 3.786715030670166
--- new best score ---


-------------------- Epoch 81 of 100 --------------------
Iteration: 0 of 31	train_loss: 3.8586
Iteration: 2 of 31	train_loss: 3.7902
Iteration: 4 of 31	train_loss: 3.7938
Iteration: 6 of 31	train_loss: 3.6610
Iteration: 8 of 31	train_loss: 3.5995
Iteration: 10 of 31	train_loss: 3.8946
Iteration: 12 of 31	train_loss: 3.4576
Iteration: 14 of 31	train_loss: 3.8333
Iteration: 16 of 31	train_loss: 3.7856
Iteration: 18 of 

Iteration: 26 of 31	train_loss: 3.8442
Iteration: 28 of 31	train_loss: 4.1659
Iteration: 30 of 31	train_loss: 3.5466
Iteration: 31 of 31	train_loss: 3.8105
Average Score for this Epoch: 3.698990821838379
-------------------- Epoch 91 of 100 --------------------
Iteration: 0 of 31	train_loss: 3.8428
Iteration: 2 of 31	train_loss: 3.6497
Iteration: 4 of 31	train_loss: 3.5880
Iteration: 6 of 31	train_loss: 3.6952
Iteration: 8 of 31	train_loss: 3.2906
Iteration: 10 of 31	train_loss: 3.7381
Iteration: 12 of 31	train_loss: 3.4003
Iteration: 14 of 31	train_loss: 3.7956
Iteration: 16 of 31	train_loss: 3.4523
Iteration: 18 of 31	train_loss: 3.5400
Iteration: 20 of 31	train_loss: 3.9086
Iteration: 22 of 31	train_loss: 3.5612
Iteration: 24 of 31	train_loss: 3.7205
Iteration: 26 of 31	train_loss: 3.5926
Iteration: 28 of 31	train_loss: 3.6762
Iteration: 30 of 31	train_loss: 3.6751
Iteration: 31 of 31	train_loss: 3.8712
Average Score for this Epoch: 3.6829233169555664
--- new best score ---


------

In [20]:
_de_inds, _de_unknowns = nmt_data_utils.convert_to_inds(de_preprocessed_clean, de_word2ind, sos = True,  eos = True)

In [21]:
# the inference model does not necessaryly need to get input batches. we can just give it. the whole input
# data, but the the batchsize has to be specified as the lenght of the input data.
nmt_model_utils.reset_graph()

nmt = NMT_Model.NMT(en_word2ind,
                    en_ind2word,
                    de_word2ind,
                    de_ind2word,
                    './models/local_one/my_model',
                    'INFER',
                    num_layers_encoder = num_layers_encoder,
                    num_layers_decoder = num_layers_decoder,
                    batch_size = len(en_inds[:50]),
                    keep_probability = 1.0,
                    learning_rate = 0.0,
                    beam_width = 0,
                    rnn_size_encoder = rnn_size_encoder,
                    rnn_size_decoder = rnn_size_decoder)

nmt.build_graph()
preds = nmt.infer(en_inds[:50], restore_path =  './models/local_one/my_model', targets = _de_inds[:50])

Graph built.
Restore graph from  ./models/local_one/my_model
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./models/local_one/my_model


In [22]:
# show some of the created translations
# Note: the way bleu score is probably not the perfect way to do it
nmt_model_utils.sample_results(preds, en_ind2word, de_ind2word, en_word2ind, de_word2ind, _de_inds[:50], en_inds[:50])




 ----------------------------------------------------------------------------------------------------
Actual Text:
wouldn ’ t you know it ?

Actual translation:
und es kam , wie es kommen musste .

Created translation:
denn kalte , , , es es musste . . ,

Bleu-score: 6.887578243315168e-155



 ----------------------------------------------------------------------------------------------------
Actual Text:
since then , the index has climbed above 10,000 .

Actual translation:
seit damals ist er auf über 10.000 punkte gestiegen .

Created translation:
die damals ist ist sarajevo noch 10.000 punkte gestiegen .

Bleu-score: 0.31239399369202553



 ----------------------------------------------------------------------------------------------------
Actual Text:
they departed pledging to revive europe 's growth .

Actual translation:
mit der zusicherung , das wachstum in europa wieder zu beleben , gingen sie auseinander .

Created translation:
aber ultraniedrige zusicherung zusicherung das

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
