In [1]:
from collections import Counter

import nltk
import NMT_Model
import nmt_data_utils
import nmt_model_utils

In [2]:
with open('news-commentary-v8.de-en.en',
          'r',
          encoding = 'utf-8') as f:
    en = f.readlines()
    
with open('news-commentary-v8.de-en.de',
          'r',
          encoding = 'utf-8') as f:
    de = f.readlines()

In [3]:
# first 5 sentence pairs. 
for line in zip(en[:5], de[:5]):
    print(line, '\n')

('SAN FRANCISCO – It has never been easy to have a rational conversation about the value of gold.\n', 'SAN FRANCISCO – Es war noch nie leicht, ein rationales Gespräch über den Wert von Gold zu führen.\n') 

('Lately, with gold prices up more than 300% over the last decade, it is harder than ever.\n', 'In letzter Zeit allerdings ist dies schwieriger denn je, ist doch der Goldpreis im letzten Jahrzehnt um über 300 Prozent angestiegen.\n') 

('Just last December, fellow economists Martin Feldstein and Nouriel Roubini each penned op-eds bravely questioning bullish market sentiment, sensibly pointing out gold’s risks.\n', 'Erst letzten Dezember verfassten meine Kollegen Martin Feldstein und Nouriel Roubini Kommentare, in denen sie mutig die vorherrschende optimistische Marktstimmung hinterfragten und sehr überlegt auf die Risiken des Goldes \xa0hinwiesen.\n') 

('Wouldn’t you know it?\n', 'Und es kam, wie es kommen musste.\n') 

('Since their articles appeared, the price of gold has moved u

In [4]:
# remove unnecessary new lines. 
de = [line.strip() for line in de]
en = [line.strip() for line in en]

In [5]:
# we will only use sentences of similar lengths in order to make training easier. 
len_en = [len(sent) for sent in en if 20 < len(sent) < 50]
len_dist = Counter(len_en).most_common()
len_dist

[(49, 599),
 (48, 599),
 (46, 583),
 (47, 547),
 (43, 514),
 (44, 512),
 (45, 511),
 (41, 509),
 (40, 503),
 (42, 490),
 (39, 477),
 (38, 443),
 (37, 438),
 (36, 421),
 (34, 412),
 (33, 365),
 (32, 358),
 (31, 353),
 (35, 346),
 (30, 326),
 (28, 324),
 (27, 273),
 (25, 260),
 (29, 254),
 (26, 250),
 (24, 233),
 (23, 232),
 (22, 214),
 (21, 208)]

In [6]:
# 11554 sentences that contain betwenn 20 and 50 words.
len(len_en)

11554

In [7]:
_de = []
_en = []
for sent_de, sent_en in zip(de, en):
    if 20 < len(sent_en) < 50:
        _de.append(sent_de)
        _en.append(sent_en)

In [13]:
# but we will not use all 150 000 sentences, only 5000 for the beginning.
text = _en[:2000]
language='english'
lower=True
words = []
tokenized_text = []

for line in text:
    tokenized = nltk.word_tokenize(line, language=language)
    if lower:
        tokenized = [word.lower() for word in tokenized]
    tokenized_text.append(tokenized)
    for word in tokenized:
        words.append(word)

most_common = Counter(words).most_common()
en_preprocessed = tokenized_text
en_most_common = most_common

text = _de[:2000]
language='german'
lower=True
words = []
tokenized_text = []

for line in text:
    tokenized = nltk.word_tokenize(line, language=language)
    if lower:
        tokenized = [word.lower() for word in tokenized]
    tokenized_text.append(tokenized)
    for word in tokenized:
        words.append(word)

most_common = Counter(words).most_common()
de_preprocessed = tokenized_text
de_most_common = most_common

en_preprocessed

[['wouldn', '’', 't', 'you', 'know', 'it', '?'],
 ['since',
  'then',
  ',',
  'the',
  'index',
  'has',
  'climbed',
  'above',
  '10,000',
  '.'],
 ['they',
  'departed',
  'pledging',
  'to',
  'revive',
  'europe',
  "'s",
  'growth',
  '.'],
 ['we', "'ve", 'heard', 'that', 'empty', 'promise', 'before', '.'],
 ['many', 'europeans', 'are', 'sick', 'of', 'british', 'vetoes', '.'],
 ['precedents', 'exist', 'for', 'this', '.'],
 ['nato', 'has', 'been', 'flexible', 'since', 'its', 'inception', '.'],
 ['these', 'precedents', 'can', 'be', 'extended', '.'],
 ['i', 'am', 'not', 'talking', 'about', 'a', 'two‑speed', 'europe', '.'],
 ['but', 'i', 'do', "n't", 'want', 'to', 'block', 'their', 'way', '.'],
 ['elected', 'parliaments', 'do', 'not', 'own', 'our', 'liberties', '.'],
 ['but', 'the', 'big', 'question', 'remains', ':', 'how', '?'],
 ['the', 'need', 'for', 'immediate', 'action', 'is', 'clear', '.'],
 ['the', 'new', 'year', 'is', 'looking', 'grim', '.'],
 ['france', 'is', 'flat-lining',

In [14]:
len(en_preprocessed), len(de_preprocessed)

(2000, 2000)

In [15]:
# for some of the sentences there is not german or english counterpart, i.e. only an empy array []
# therefore we will remove those sentence pairs.
en_preprocessed_clean, de_preprocessed_clean = [], []

for sent_en, sent_de in zip(en_preprocessed, de_preprocessed):
    if sent_en != [] and sent_de != []:
        en_preprocessed_clean.append(sent_en)
        de_preprocessed_clean.append(sent_de)
    else:
        continue

In [16]:
len(en_preprocessed_clean), len(de_preprocessed_clean)

(1992, 1992)

In [17]:
for e, d in zip(en_preprocessed_clean, de_preprocessed_clean[:5]):
    print('English:\n', e)
    print('German:\n', d, '\n'*3)

English:
 ['wouldn', '’', 't', 'you', 'know', 'it', '?']
German:
 ['und', 'es', 'kam', ',', 'wie', 'es', 'kommen', 'musste', '.'] 



English:
 ['since', 'then', ',', 'the', 'index', 'has', 'climbed', 'above', '10,000', '.']
German:
 ['seit', 'damals', 'ist', 'er', 'auf', 'über', '10.000', 'punkte', 'gestiegen', '.'] 



English:
 ['they', 'departed', 'pledging', 'to', 'revive', 'europe', "'s", 'growth', '.']
German:
 ['mit', 'der', 'zusicherung', ',', 'das', 'wachstum', 'in', 'europa', 'wieder', 'zu', 'beleben', ',', 'gingen', 'sie', 'auseinander', '.'] 



English:
 ['we', "'ve", 'heard', 'that', 'empty', 'promise', 'before', '.']
German:
 ['dieses', 'leere', 'versprechen', 'haben', 'wir', 'schon', 'einmal', 'gehört', '.'] 



English:
 ['many', 'europeans', 'are', 'sick', 'of', 'british', 'vetoes', '.']
German:
 ['viele', 'europäer', 'sind', 'die', 'britischen', 'vetos', 'leid', '.'] 





In [18]:
en_most_common[:15], len(en_most_common), len(de_most_common)

([('.', 1747),
  ('the', 648),
  ('is', 559),
  (',', 437),
  ('to', 297),
  ('this', 258),
  ('a', 239),
  ('but', 234),
  ('of', 213),
  ('not', 197),
  ('in', 193),
  ('are', 192),
  ('?', 184),
  ('it', 182),
  ('be', 161)],
 3174,
 7078)

In [19]:
# now we can create oyr lookup dicts for english and german, i.e. our vocab. 
# we will also include special tokens, later on used in the model. 
specials = ["<unk>", "<s>", "</s>", '<pad>']

en_word2ind, en_ind2word, en_vocab_size = nmt_data_utils.create_vocab(en_most_common, specials)
de_word2ind, de_ind2word, de_vocab_size = nmt_data_utils.create_vocab(de_most_common, specials)

In [20]:
# in order to feed the sentences to the network, we have to convert them to ints, corresponding to their indices
# in the lookup dicts. 
# we reverse the source language sentences, i.e. the english sentences as this alleviates learning for the seq2seq 
# model. Apart from this we also include EndOfSentence and StartOfSentence tags, which are needed as well. 
en_inds, en_unknowns = nmt_data_utils.convert_to_inds(en_preprocessed_clean, en_word2ind, reverse = True, eos = True)
de_inds, de_unknowns = nmt_data_utils.convert_to_inds(de_preprocessed_clean, de_word2ind, sos = True, eos = True)

In [21]:
[nmt_data_utils.convert_to_words(sentence, en_ind2word) for sentence in  en_inds[:2]]

[['?', 'it', 'know', 'you', 't', '’', 'wouldn', '</s>'],
 ['.',
  '10,000',
  'above',
  'climbed',
  'has',
  'index',
  'the',
  ',',
  'then',
  'since',
  '</s>']]

In [22]:
[nmt_data_utils.convert_to_words(sentence, de_ind2word) for sentence in  de_inds[:2]]

[['<s>',
  'und',
  'es',
  'kam',
  ',',
  'wie',
  'es',
  'kommen',
  'musste',
  '.',
  '</s>'],
 ['<s>',
  'seit',
  'damals',
  'ist',
  'er',
  'auf',
  'über',
  '10.000',
  'punkte',
  'gestiegen',
  '.',
  '</s>']]

In [24]:
# hyperparams. 
# those are probably not perfect, but work fine for now. 
num_layers_encoder = 4
num_layers_decoder = 4
rnn_size_encoder = 128
rnn_size_decoder = 128
embedding_dim = 300

batch_size = 64
epochs = 250
clip = 5
keep_probability = 0.8
learning_rate = 0.01
learning_rate_decay_steps = 1000
learning_rate_decay = 0.9

In [25]:
# create the graph and train the model. 
nmt_model_utils.reset_graph()

nmt = NMT_Model.NMT(en_word2ind,
                    en_ind2word,
                    de_word2ind,
                    de_ind2word,
                    './models/local_one/my_model',
                    'TRAIN',
                    embedding_dim = embedding_dim,
                    num_layers_encoder = num_layers_encoder,
                    num_layers_decoder = num_layers_decoder,
                    batch_size = batch_size,
                    clip = clip,
                    keep_probability = keep_probability,
                    learning_rate = learning_rate,
                    epochs = epochs,
                    rnn_size_encoder = rnn_size_encoder,
                    rnn_size_decoder = rnn_size_decoder, 
                    learning_rate_decay_steps = learning_rate_decay_steps,
                    learning_rate_decay = learning_rate_decay)

nmt.build_graph()
nmt.train(en_inds, de_inds)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Graph built.
-------------------- Epoch 0 of

Iteration: 10 of 31	train_loss: 5.7219
Iteration: 12 of 31	train_loss: 6.0272
Iteration: 14 of 31	train_loss: 5.8561
Iteration: 16 of 31	train_loss: 5.9408
Iteration: 18 of 31	train_loss: 5.7624
Iteration: 20 of 31	train_loss: 6.0337
Iteration: 22 of 31	train_loss: 6.1276
Iteration: 24 of 31	train_loss: 5.9860
Iteration: 26 of 31	train_loss: 5.9008
Iteration: 28 of 31	train_loss: 5.8000
Iteration: 30 of 31	train_loss: 5.5604
Iteration: 31 of 31	train_loss: 5.5847
Average Score for this Epoch: 5.818266868591309
--- new best score ---


-------------------- Epoch 8 of 250 --------------------
Iteration: 0 of 31	train_loss: 5.6353
Iteration: 2 of 31	train_loss: 5.5086
Iteration: 4 of 31	train_loss: 5.6072
Iteration: 6 of 31	train_loss: 5.6228
Iteration: 8 of 31	train_loss: 5.5265
Iteration: 10 of 31	train_loss: 5.6853
Iteration: 12 of 31	train_loss: 5.8621
Iteration: 14 of 31	train_loss: 5.8603
Iteration: 16 of 31	train_loss: 5.7057
Iteration: 18 of 31	train_loss: 5.5943
Iteration: 20 of 

Iteration: 26 of 31	train_loss: 5.7135
Iteration: 28 of 31	train_loss: 5.4353
Iteration: 30 of 31	train_loss: 5.2927
Iteration: 31 of 31	train_loss: 5.2502
Average Score for this Epoch: 5.330804824829102
--- new best score ---


-------------------- Epoch 18 of 250 --------------------
Iteration: 0 of 31	train_loss: 4.9200
Iteration: 2 of 31	train_loss: 5.3333
Iteration: 4 of 31	train_loss: 5.1435
Iteration: 6 of 31	train_loss: 5.1457
Iteration: 8 of 31	train_loss: 5.2991
Iteration: 10 of 31	train_loss: 5.2648
Iteration: 12 of 31	train_loss: 5.3982
Iteration: 14 of 31	train_loss: 5.3610
Iteration: 16 of 31	train_loss: 5.4843
Iteration: 18 of 31	train_loss: 5.1919
Iteration: 20 of 31	train_loss: 5.3650
Iteration: 22 of 31	train_loss: 5.2214
Iteration: 24 of 31	train_loss: 5.4227
Iteration: 26 of 31	train_loss: 5.5349
Iteration: 28 of 31	train_loss: 5.5520
Iteration: 30 of 31	train_loss: 5.4980
Iteration: 31 of 31	train_loss: 4.8598
Average Score for this Epoch: 5.28370475769043
--- new 

Iteration: 2 of 31	train_loss: 4.9335
Iteration: 4 of 31	train_loss: 4.6617
Iteration: 6 of 31	train_loss: 5.0531
Iteration: 8 of 31	train_loss: 4.6624
Iteration: 10 of 31	train_loss: 4.8061
Iteration: 12 of 31	train_loss: 4.7670
Iteration: 14 of 31	train_loss: 4.9676
Iteration: 16 of 31	train_loss: 4.7825
Iteration: 18 of 31	train_loss: 5.1335
Iteration: 20 of 31	train_loss: 5.0256
Iteration: 22 of 31	train_loss: 4.9514
Iteration: 24 of 31	train_loss: 4.9415
Iteration: 26 of 31	train_loss: 5.1721
Iteration: 28 of 31	train_loss: 4.8910
Iteration: 30 of 31	train_loss: 4.8337
Iteration: 31 of 31	train_loss: 4.7440
Average Score for this Epoch: 4.899945259094238
--- new best score ---


-------------------- Epoch 29 of 250 --------------------
Iteration: 0 of 31	train_loss: 4.8572
Iteration: 2 of 31	train_loss: 4.7470
Iteration: 4 of 31	train_loss: 4.9253
Iteration: 6 of 31	train_loss: 4.9061
Iteration: 8 of 31	train_loss: 4.9184
Iteration: 10 of 31	train_loss: 4.9205
Iteration: 12 of 31	

Iteration: 18 of 31	train_loss: 4.7710
Iteration: 20 of 31	train_loss: 4.3405
Iteration: 22 of 31	train_loss: 4.2803
Iteration: 24 of 31	train_loss: 4.5922
Iteration: 26 of 31	train_loss: 4.7974
Iteration: 28 of 31	train_loss: 4.2308
Iteration: 30 of 31	train_loss: 4.5362
Iteration: 31 of 31	train_loss: 4.5087
Average Score for this Epoch: 4.60654878616333
--- new best score ---


-------------------- Epoch 39 of 250 --------------------
Iteration: 0 of 31	train_loss: 4.3661
Iteration: 2 of 31	train_loss: 4.6990
Iteration: 4 of 31	train_loss: 4.5634
Iteration: 6 of 31	train_loss: 4.5986
Iteration: 8 of 31	train_loss: 4.6488
Iteration: 10 of 31	train_loss: 4.3265
Iteration: 12 of 31	train_loss: 4.6769
Iteration: 14 of 31	train_loss: 4.5632
Iteration: 16 of 31	train_loss: 4.7554
Iteration: 18 of 31	train_loss: 4.5293
Iteration: 20 of 31	train_loss: 4.5224
Iteration: 22 of 31	train_loss: 4.5265
Iteration: 24 of 31	train_loss: 4.5843
Iteration: 26 of 31	train_loss: 4.3471
Iteration: 28 of 

Average Score for this Epoch: 4.3755879402160645
--- new best score ---


-------------------- Epoch 49 of 250 --------------------
Iteration: 0 of 31	train_loss: 4.3773
Iteration: 2 of 31	train_loss: 4.4623
Iteration: 4 of 31	train_loss: 4.3776
Iteration: 6 of 31	train_loss: 4.5161
Iteration: 8 of 31	train_loss: 4.3613
Iteration: 10 of 31	train_loss: 4.3825
Iteration: 12 of 31	train_loss: 4.3119
Iteration: 14 of 31	train_loss: 4.5413
Iteration: 16 of 31	train_loss: 4.3134
Iteration: 18 of 31	train_loss: 4.4603
Iteration: 20 of 31	train_loss: 4.3006
Iteration: 22 of 31	train_loss: 4.4825
Iteration: 24 of 31	train_loss: 4.3493
Iteration: 26 of 31	train_loss: 4.5456
Iteration: 28 of 31	train_loss: 4.6673
Iteration: 30 of 31	train_loss: 4.6436
Iteration: 31 of 31	train_loss: 4.4943
Average Score for this Epoch: 4.377231121063232
-------------------- Epoch 50 of 250 --------------------
Iteration: 0 of 31	train_loss: 4.3277
Iteration: 2 of 31	train_loss: 3.9797
Iteration: 4 of 31	train_los

Iteration: 12 of 31	train_loss: 3.9910
Iteration: 14 of 31	train_loss: 4.1254
Iteration: 16 of 31	train_loss: 4.0114
Iteration: 18 of 31	train_loss: 4.0130
Iteration: 20 of 31	train_loss: 4.4124
Iteration: 22 of 31	train_loss: 4.6003
Iteration: 24 of 31	train_loss: 4.2489
Iteration: 26 of 31	train_loss: 4.4613
Iteration: 28 of 31	train_loss: 4.3267
Iteration: 30 of 31	train_loss: 4.4117
Iteration: 31 of 31	train_loss: 4.0361
Average Score for this Epoch: 4.209935665130615
--- new best score ---


-------------------- Epoch 60 of 250 --------------------
Iteration: 0 of 31	train_loss: 4.2504
Iteration: 2 of 31	train_loss: 4.2278
Iteration: 4 of 31	train_loss: 4.1567
Iteration: 6 of 31	train_loss: 4.2606
Iteration: 8 of 31	train_loss: 3.7245
Iteration: 10 of 31	train_loss: 4.0932
Iteration: 12 of 31	train_loss: 4.3253
Iteration: 14 of 31	train_loss: 4.2181
Iteration: 16 of 31	train_loss: 4.2680
Iteration: 18 of 31	train_loss: 4.1535
Iteration: 20 of 31	train_loss: 4.1515
Iteration: 22 of

Iteration: 30 of 31	train_loss: 4.3096
Iteration: 31 of 31	train_loss: 3.9857
Average Score for this Epoch: 4.064868450164795
--- new best score ---


-------------------- Epoch 70 of 250 --------------------
Iteration: 0 of 31	train_loss: 3.9546
Iteration: 2 of 31	train_loss: 3.8055
Iteration: 4 of 31	train_loss: 4.0710
Iteration: 6 of 31	train_loss: 4.2326
Iteration: 8 of 31	train_loss: 4.0506
Iteration: 10 of 31	train_loss: 4.1436
Iteration: 12 of 31	train_loss: 4.2062
Iteration: 14 of 31	train_loss: 4.1066
Iteration: 16 of 31	train_loss: 4.2589
Iteration: 18 of 31	train_loss: 4.2149
Iteration: 20 of 31	train_loss: 3.8193
Iteration: 22 of 31	train_loss: 3.8811
Iteration: 24 of 31	train_loss: 4.2277
Iteration: 26 of 31	train_loss: 4.2013
Iteration: 28 of 31	train_loss: 4.3819
Iteration: 30 of 31	train_loss: 4.2109
Iteration: 31 of 31	train_loss: 3.8879
Average Score for this Epoch: 4.051791191101074
--- new best score ---


-------------------- Epoch 71 of 250 --------------------
It

Iteration: 8 of 31	train_loss: 3.7286
Iteration: 10 of 31	train_loss: 4.4856
Iteration: 12 of 31	train_loss: 3.7781
Iteration: 14 of 31	train_loss: 4.0544
Iteration: 16 of 31	train_loss: 4.2873
Iteration: 18 of 31	train_loss: 3.8009
Iteration: 20 of 31	train_loss: 4.1563
Iteration: 22 of 31	train_loss: 3.9706
Iteration: 24 of 31	train_loss: 3.9253
Iteration: 26 of 31	train_loss: 3.8423
Iteration: 28 of 31	train_loss: 4.0097
Iteration: 30 of 31	train_loss: 3.5129
Iteration: 31 of 31	train_loss: 4.1651
Average Score for this Epoch: 3.9313101768493652
-------------------- Epoch 81 of 250 --------------------
Iteration: 0 of 31	train_loss: 4.0862
Iteration: 2 of 31	train_loss: 3.8923
Iteration: 4 of 31	train_loss: 3.9257
Iteration: 6 of 31	train_loss: 3.7570
Iteration: 8 of 31	train_loss: 3.7107
Iteration: 10 of 31	train_loss: 4.0345
Iteration: 12 of 31	train_loss: 3.5835
Iteration: 14 of 31	train_loss: 3.9851
Iteration: 16 of 31	train_loss: 3.8425
Iteration: 18 of 31	train_loss: 3.7782
It

Iteration: 28 of 31	train_loss: 4.1755
Iteration: 30 of 31	train_loss: 3.7240
Iteration: 31 of 31	train_loss: 3.9380
Average Score for this Epoch: 3.808171510696411
-------------------- Epoch 91 of 250 --------------------
Iteration: 0 of 31	train_loss: 3.9381
Iteration: 2 of 31	train_loss: 3.7275
Iteration: 4 of 31	train_loss: 3.6525
Iteration: 6 of 31	train_loss: 3.8947
Iteration: 8 of 31	train_loss: 3.3705
Iteration: 10 of 31	train_loss: 3.8419
Iteration: 12 of 31	train_loss: 3.5785
Iteration: 14 of 31	train_loss: 3.9299
Iteration: 16 of 31	train_loss: 3.6122
Iteration: 18 of 31	train_loss: 3.5925
Iteration: 20 of 31	train_loss: 4.0432
Iteration: 22 of 31	train_loss: 3.5710
Iteration: 24 of 31	train_loss: 3.8449
Iteration: 26 of 31	train_loss: 3.6932
Iteration: 28 of 31	train_loss: 3.7660
Iteration: 30 of 31	train_loss: 3.7891
Iteration: 31 of 31	train_loss: 3.9592
Average Score for this Epoch: 3.7908167839050293
-------------------- Epoch 92 of 250 --------------------
Iteration: 0

Iteration: 8 of 31	train_loss: 3.5468
Iteration: 10 of 31	train_loss: 3.4812
Iteration: 12 of 31	train_loss: 3.6080
Iteration: 14 of 31	train_loss: 3.5163
Iteration: 16 of 31	train_loss: 3.5078
Iteration: 18 of 31	train_loss: 3.8091
Iteration: 20 of 31	train_loss: 3.6009
Iteration: 22 of 31	train_loss: 3.7453
Iteration: 24 of 31	train_loss: 3.8936
Iteration: 26 of 31	train_loss: 3.2001
Iteration: 28 of 31	train_loss: 4.0571
Iteration: 30 of 31	train_loss: 3.8553
Iteration: 31 of 31	train_loss: 3.5584
Average Score for this Epoch: 3.678983688354492
-------------------- Epoch 102 of 250 --------------------
Iteration: 0 of 31	train_loss: 3.6453
Iteration: 2 of 31	train_loss: 3.3740
Iteration: 4 of 31	train_loss: 3.6386
Iteration: 6 of 31	train_loss: 3.8221
Iteration: 8 of 31	train_loss: 3.6487
Iteration: 10 of 31	train_loss: 3.8081
Iteration: 12 of 31	train_loss: 3.3995
Iteration: 14 of 31	train_loss: 3.4248
Iteration: 16 of 31	train_loss: 3.8698
Iteration: 18 of 31	train_loss: 3.6431
It

Iteration: 26 of 31	train_loss: 3.7665
Iteration: 28 of 31	train_loss: 3.5469
Iteration: 30 of 31	train_loss: 3.4679
Iteration: 31 of 31	train_loss: 3.6869
Average Score for this Epoch: 3.5652475357055664
--- new best score ---


-------------------- Epoch 112 of 250 --------------------
Iteration: 0 of 31	train_loss: 3.6091
Iteration: 2 of 31	train_loss: 3.7531
Iteration: 4 of 31	train_loss: 2.8903
Iteration: 6 of 31	train_loss: 3.2963
Iteration: 8 of 31	train_loss: 3.4703
Iteration: 10 of 31	train_loss: 3.5950
Iteration: 12 of 31	train_loss: 3.6878
Iteration: 14 of 31	train_loss: 3.4467
Iteration: 16 of 31	train_loss: 3.7447
Iteration: 18 of 31	train_loss: 3.6681
Iteration: 20 of 31	train_loss: 2.9416
Iteration: 22 of 31	train_loss: 3.9146
Iteration: 24 of 31	train_loss: 3.6993
Iteration: 26 of 31	train_loss: 3.2845
Iteration: 28 of 31	train_loss: 3.6623
Iteration: 30 of 31	train_loss: 3.8452
Iteration: 31 of 31	train_loss: 3.7633
Average Score for this Epoch: 3.5419023036956787
--- 

Iteration: 6 of 31	train_loss: 3.2535
Iteration: 8 of 31	train_loss: 3.3017
Iteration: 10 of 31	train_loss: 3.3270
Iteration: 12 of 31	train_loss: 3.3533
Iteration: 14 of 31	train_loss: 3.5987
Iteration: 16 of 31	train_loss: 3.4153
Iteration: 18 of 31	train_loss: 3.4176
Iteration: 20 of 31	train_loss: 3.5329
Iteration: 22 of 31	train_loss: 3.3298
Iteration: 24 of 31	train_loss: 3.3200
Iteration: 26 of 31	train_loss: 3.3345
Iteration: 28 of 31	train_loss: 3.8229
Iteration: 30 of 31	train_loss: 3.4875
Iteration: 31 of 31	train_loss: 3.3375
Average Score for this Epoch: 3.460562229156494
-------------------- Epoch 123 of 250 --------------------
Iteration: 0 of 31	train_loss: 3.2766
Iteration: 2 of 31	train_loss: 3.3868
Iteration: 4 of 31	train_loss: 3.3719
Iteration: 6 of 31	train_loss: 3.3916
Iteration: 8 of 31	train_loss: 3.5729
Iteration: 10 of 31	train_loss: 3.4235
Iteration: 12 of 31	train_loss: 3.5408
Iteration: 14 of 31	train_loss: 2.9507
Iteration: 16 of 31	train_loss: 3.4327
Ite

Iteration: 26 of 31	train_loss: 3.4653
Iteration: 28 of 31	train_loss: 3.2471
Iteration: 30 of 31	train_loss: 3.4539
Iteration: 31 of 31	train_loss: 3.1834
Average Score for this Epoch: 3.3530032634735107
-------------------- Epoch 133 of 250 --------------------
Iteration: 0 of 31	train_loss: 3.0409
Iteration: 2 of 31	train_loss: 3.1475
Iteration: 4 of 31	train_loss: 3.5874
Iteration: 6 of 31	train_loss: 3.5728
Iteration: 8 of 31	train_loss: 3.4634
Iteration: 10 of 31	train_loss: 3.0607
Iteration: 12 of 31	train_loss: 3.5283
Iteration: 14 of 31	train_loss: 3.2838
Iteration: 16 of 31	train_loss: 3.4891
Iteration: 18 of 31	train_loss: 3.2696
Iteration: 20 of 31	train_loss: 3.4100
Iteration: 22 of 31	train_loss: 3.4383
Iteration: 24 of 31	train_loss: 3.3526
Iteration: 26 of 31	train_loss: 2.9810
Iteration: 28 of 31	train_loss: 3.3255
Iteration: 30 of 31	train_loss: 3.2271
Iteration: 31 of 31	train_loss: 3.1301
Average Score for this Epoch: 3.345018148422241
--- new best score ---


-----

Iteration: 4 of 31	train_loss: 3.2003
Iteration: 6 of 31	train_loss: 3.4437
Iteration: 8 of 31	train_loss: 3.1847
Iteration: 10 of 31	train_loss: 3.4571
Iteration: 12 of 31	train_loss: 3.4635
Iteration: 14 of 31	train_loss: 3.3033
Iteration: 16 of 31	train_loss: 2.8316
Iteration: 18 of 31	train_loss: 3.3333
Iteration: 20 of 31	train_loss: 3.7303
Iteration: 22 of 31	train_loss: 3.1752
Iteration: 24 of 31	train_loss: 2.8488
Iteration: 26 of 31	train_loss: 3.2379
Iteration: 28 of 31	train_loss: 3.1599
Iteration: 30 of 31	train_loss: 3.4986
Iteration: 31 of 31	train_loss: 3.3110
Average Score for this Epoch: 3.2699410915374756
-------------------- Epoch 144 of 250 --------------------
Iteration: 0 of 31	train_loss: 3.6723
Iteration: 2 of 31	train_loss: 3.7481
Iteration: 4 of 31	train_loss: 3.0437
Iteration: 6 of 31	train_loss: 3.7148
Iteration: 8 of 31	train_loss: 3.6097
Iteration: 10 of 31	train_loss: 3.3231
Iteration: 12 of 31	train_loss: 3.0874
Iteration: 14 of 31	train_loss: 3.2179
Ite

Iteration: 28 of 31	train_loss: 3.2706
Iteration: 30 of 31	train_loss: 3.3574
Iteration: 31 of 31	train_loss: 3.0160
Average Score for this Epoch: 3.2060937881469727
--- new best score ---


-------------------- Epoch 154 of 250 --------------------
Iteration: 0 of 31	train_loss: 3.1571
Iteration: 2 of 31	train_loss: 2.9086
Iteration: 4 of 31	train_loss: 3.3121
Iteration: 6 of 31	train_loss: 3.4802
Iteration: 8 of 31	train_loss: 3.1795
Iteration: 10 of 31	train_loss: 3.1340
Iteration: 12 of 31	train_loss: 3.2642
Iteration: 14 of 31	train_loss: 3.1791
Iteration: 16 of 31	train_loss: 3.1107
Iteration: 18 of 31	train_loss: 3.5060
Iteration: 20 of 31	train_loss: 3.3175
Iteration: 22 of 31	train_loss: 3.3909
Iteration: 24 of 31	train_loss: 3.4361
Iteration: 26 of 31	train_loss: 3.0096
Iteration: 28 of 31	train_loss: 3.0686
Iteration: 30 of 31	train_loss: 3.0783
Iteration: 31 of 31	train_loss: 3.2535
Average Score for this Epoch: 3.221770763397217
-------------------- Epoch 155 of 250 ------

Iteration: 6 of 31	train_loss: 2.8778
Iteration: 8 of 31	train_loss: 3.2154
Iteration: 10 of 31	train_loss: 2.7457
Iteration: 12 of 31	train_loss: 3.0410
Iteration: 14 of 31	train_loss: 3.4536
Iteration: 16 of 31	train_loss: 3.3023
Iteration: 18 of 31	train_loss: 2.9877
Iteration: 20 of 31	train_loss: 2.8466
Iteration: 22 of 31	train_loss: 3.5514
Iteration: 24 of 31	train_loss: 3.2836
Iteration: 26 of 31	train_loss: 3.1375
Iteration: 28 of 31	train_loss: 3.4385
Iteration: 30 of 31	train_loss: 3.2326
Iteration: 31 of 31	train_loss: 2.9685
Average Score for this Epoch: 3.133108377456665
-------------------- Epoch 165 of 250 --------------------
Iteration: 0 of 31	train_loss: 2.7131
Iteration: 2 of 31	train_loss: 3.0864
Iteration: 4 of 31	train_loss: 3.2833
Iteration: 6 of 31	train_loss: 3.0623
Iteration: 8 of 31	train_loss: 2.9290
Iteration: 10 of 31	train_loss: 3.0385
Iteration: 12 of 31	train_loss: 2.8364
Iteration: 14 of 31	train_loss: 3.3185
Iteration: 16 of 31	train_loss: 3.0105
Ite

Iteration: 24 of 31	train_loss: 2.8081
Iteration: 26 of 31	train_loss: 3.0661
Iteration: 28 of 31	train_loss: 3.3345
Iteration: 30 of 31	train_loss: 3.6340
Iteration: 31 of 31	train_loss: 2.6641
Average Score for this Epoch: 3.045274257659912
--- new best score ---


-------------------- Epoch 175 of 250 --------------------
Iteration: 0 of 31	train_loss: 3.2281
Iteration: 2 of 31	train_loss: 2.7935
Iteration: 4 of 31	train_loss: 3.2249
Iteration: 6 of 31	train_loss: 2.8889
Iteration: 8 of 31	train_loss: 3.1904
Iteration: 10 of 31	train_loss: 2.8960
Iteration: 12 of 31	train_loss: 3.2220
Iteration: 14 of 31	train_loss: 2.9711
Iteration: 16 of 31	train_loss: 3.0462
Iteration: 18 of 31	train_loss: 3.0492
Iteration: 20 of 31	train_loss: 3.2907
Iteration: 22 of 31	train_loss: 3.1689
Iteration: 24 of 31	train_loss: 2.9848
Iteration: 26 of 31	train_loss: 2.7870
Iteration: 28 of 31	train_loss: 3.1848
Iteration: 30 of 31	train_loss: 3.1629
Iteration: 31 of 31	train_loss: 3.2523
Average Score f

Iteration: 4 of 31	train_loss: 3.0346
Iteration: 6 of 31	train_loss: 2.9183
Iteration: 8 of 31	train_loss: 3.2796
Iteration: 10 of 31	train_loss: 3.0556
Iteration: 12 of 31	train_loss: 2.8179
Iteration: 14 of 31	train_loss: 2.9093
Iteration: 16 of 31	train_loss: 3.2783
Iteration: 18 of 31	train_loss: 3.1261
Iteration: 20 of 31	train_loss: 2.7205
Iteration: 22 of 31	train_loss: 3.3126
Iteration: 24 of 31	train_loss: 3.1391
Iteration: 26 of 31	train_loss: 3.1148
Iteration: 28 of 31	train_loss: 3.2614
Iteration: 30 of 31	train_loss: 2.6614
Iteration: 31 of 31	train_loss: 3.1632
Average Score for this Epoch: 3.017617702484131
-------------------- Epoch 186 of 250 --------------------
Iteration: 0 of 31	train_loss: 2.9929
Iteration: 2 of 31	train_loss: 3.2920
Iteration: 4 of 31	train_loss: 3.0820
Iteration: 6 of 31	train_loss: 3.2081
Iteration: 8 of 31	train_loss: 2.7628
Iteration: 10 of 31	train_loss: 3.0526
Iteration: 12 of 31	train_loss: 2.9061
Iteration: 14 of 31	train_loss: 2.9315
Iter

Iteration: 26 of 31	train_loss: 3.0804
Iteration: 28 of 31	train_loss: 3.3045
Iteration: 30 of 31	train_loss: 2.6972
Iteration: 31 of 31	train_loss: 3.1585
Average Score for this Epoch: 2.9246912002563477
--- new best score ---


-------------------- Epoch 196 of 250 --------------------
Iteration: 0 of 31	train_loss: 2.7404
Iteration: 2 of 31	train_loss: 2.6485
Iteration: 4 of 31	train_loss: 3.2409
Iteration: 6 of 31	train_loss: 3.1054
Iteration: 8 of 31	train_loss: 2.8974
Iteration: 10 of 31	train_loss: 3.2988
Iteration: 12 of 31	train_loss: 2.7080
Iteration: 14 of 31	train_loss: 2.9831
Iteration: 16 of 31	train_loss: 2.8144
Iteration: 18 of 31	train_loss: 3.1641
Iteration: 20 of 31	train_loss: 2.9038
Iteration: 22 of 31	train_loss: 2.9090
Iteration: 24 of 31	train_loss: 3.0424
Iteration: 26 of 31	train_loss: 2.8853
Iteration: 28 of 31	train_loss: 2.9270
Iteration: 30 of 31	train_loss: 3.1627
Iteration: 31 of 31	train_loss: 2.6581
Average Score for this Epoch: 2.905926465988159
--- n

Iteration: 8 of 31	train_loss: 3.0000
Iteration: 10 of 31	train_loss: 2.6738
Iteration: 12 of 31	train_loss: 2.8780
Iteration: 14 of 31	train_loss: 2.7430
Iteration: 16 of 31	train_loss: 2.8124
Iteration: 18 of 31	train_loss: 2.9067
Iteration: 20 of 31	train_loss: 2.6741
Iteration: 22 of 31	train_loss: 2.8683
Iteration: 24 of 31	train_loss: 2.9898
Iteration: 26 of 31	train_loss: 2.8492
Iteration: 28 of 31	train_loss: 2.6453
Iteration: 30 of 31	train_loss: 2.9627
Iteration: 31 of 31	train_loss: 2.8574
Average Score for this Epoch: 2.8619353771209717
-------------------- Epoch 207 of 250 --------------------
Iteration: 0 of 31	train_loss: 2.6681
Iteration: 2 of 31	train_loss: 2.9544
Iteration: 4 of 31	train_loss: 2.6899
Iteration: 6 of 31	train_loss: 2.8495
Iteration: 8 of 31	train_loss: 2.8370
Iteration: 10 of 31	train_loss: 2.9551
Iteration: 12 of 31	train_loss: 3.2082
Iteration: 14 of 31	train_loss: 2.9321
Iteration: 16 of 31	train_loss: 2.7420
Iteration: 18 of 31	train_loss: 3.2196
I

Iteration: 31 of 31	train_loss: 2.8839
Average Score for this Epoch: 2.8109898567199707
--- new best score ---


-------------------- Epoch 217 of 250 --------------------
Iteration: 0 of 31	train_loss: 2.6283
Iteration: 2 of 31	train_loss: 2.8674
Iteration: 4 of 31	train_loss: 2.7718
Iteration: 6 of 31	train_loss: 2.9142
Iteration: 8 of 31	train_loss: 2.6746
Iteration: 10 of 31	train_loss: 2.8447
Iteration: 12 of 31	train_loss: 2.7644
Iteration: 14 of 31	train_loss: 2.9179
Iteration: 16 of 31	train_loss: 2.6310
Iteration: 18 of 31	train_loss: 2.3518
Iteration: 20 of 31	train_loss: 2.7694
Iteration: 22 of 31	train_loss: 2.9770
Iteration: 24 of 31	train_loss: 2.7849
Iteration: 26 of 31	train_loss: 3.0281
Iteration: 28 of 31	train_loss: 2.6567
Iteration: 30 of 31	train_loss: 3.2987
Iteration: 31 of 31	train_loss: 2.6085
Average Score for this Epoch: 2.8066070079803467
--- new best score ---


-------------------- Epoch 218 of 250 --------------------
Iteration: 0 of 31	train_loss: 2.7472

Iteration: 12 of 31	train_loss: 2.7788
Iteration: 14 of 31	train_loss: 2.9032
Iteration: 16 of 31	train_loss: 2.3090
Iteration: 18 of 31	train_loss: 2.6143
Iteration: 20 of 31	train_loss: 3.1404
Iteration: 22 of 31	train_loss: 3.1222
Iteration: 24 of 31	train_loss: 2.9038
Iteration: 26 of 31	train_loss: 2.9083
Iteration: 28 of 31	train_loss: 2.3768
Iteration: 30 of 31	train_loss: 3.0623
Iteration: 31 of 31	train_loss: 3.1443
Average Score for this Epoch: 2.7691264152526855
-------------------- Epoch 228 of 250 --------------------
Iteration: 0 of 31	train_loss: 2.7911
Iteration: 2 of 31	train_loss: 2.4763
Iteration: 4 of 31	train_loss: 2.9774
Iteration: 6 of 31	train_loss: 2.6044
Iteration: 8 of 31	train_loss: 3.3792
Iteration: 10 of 31	train_loss: 2.8654
Iteration: 12 of 31	train_loss: 2.9782
Iteration: 14 of 31	train_loss: 2.5521
Iteration: 16 of 31	train_loss: 2.5755
Iteration: 18 of 31	train_loss: 2.7904
Iteration: 20 of 31	train_loss: 2.9733
Iteration: 22 of 31	train_loss: 2.6554


--- new best score ---


-------------------- Epoch 238 of 250 --------------------
Iteration: 0 of 31	train_loss: 2.7188
Iteration: 2 of 31	train_loss: 2.9729
Iteration: 4 of 31	train_loss: 2.7262
Iteration: 6 of 31	train_loss: 2.6686
Iteration: 8 of 31	train_loss: 2.4059
Iteration: 10 of 31	train_loss: 2.5810
Iteration: 12 of 31	train_loss: 2.7285
Iteration: 14 of 31	train_loss: 2.8760
Iteration: 16 of 31	train_loss: 2.6848
Iteration: 18 of 31	train_loss: 3.0185
Iteration: 20 of 31	train_loss: 2.4801
Iteration: 22 of 31	train_loss: 2.3196
Iteration: 24 of 31	train_loss: 2.5233
Iteration: 26 of 31	train_loss: 2.9791
Iteration: 28 of 31	train_loss: 3.0662
Iteration: 30 of 31	train_loss: 2.9093
Iteration: 31 of 31	train_loss: 2.7772
Average Score for this Epoch: 2.710735321044922
-------------------- Epoch 239 of 250 --------------------
Iteration: 0 of 31	train_loss: 2.7305
Iteration: 2 of 31	train_loss: 2.5740
Iteration: 4 of 31	train_loss: 2.7395
Iteration: 6 of 31	train_loss: 2.5731

In [28]:
_de_inds, _de_unknowns = nmt_data_utils.convert_to_inds(de_preprocessed_clean, de_word2ind, sos = True,  eos = True)

In [29]:
# the inference model does not necessaryly need to get input batches. we can just give it. the whole input
# data, but the the batchsize has to be specified as the lenght of the input data.
nmt_model_utils.reset_graph()

nmt = NMT_Model.NMT(en_word2ind,
                    en_ind2word,
                    de_word2ind,
                    de_ind2word,
                    './models/local_one/my_model',
                    'INFER',
                    num_layers_encoder = num_layers_encoder,
                    num_layers_decoder = num_layers_decoder,
                    batch_size = len(en_inds[:50]),
                    keep_probability = 1.0,
                    learning_rate = 0.0,
                    beam_width = 0,
                    rnn_size_encoder = rnn_size_encoder,
                    rnn_size_decoder = rnn_size_decoder)

nmt.build_graph()
preds = nmt.infer(en_inds[:50], restore_path =  './models/local_one/my_model', targets = _de_inds[:50])

Graph built.
Restore graph from  ./models/local_one/my_model
INFO:tensorflow:Restoring parameters from ./models/local_one/my_model


In [30]:
# show some of the created translations
# Note: the way bleu score is probably not the perfect way to do it
nmt_model_utils.sample_results(preds, en_ind2word, de_ind2word, en_word2ind, de_word2ind, _de_inds[:50], en_inds[:50])




 ----------------------------------------------------------------------------------------------------
Actual Text:
wouldn ’ t you know it ?

Actual translation:
und es kam , wie es kommen musste .

Created translation:
was wie kam bis sie sie mit musste , allgemeine vorgibt

Bleu-score: 1.4147351699132998e-231



 ----------------------------------------------------------------------------------------------------
Actual Text:
since then , the index has climbed above 10,000 .

Actual translation:
seit damals ist er auf über 10.000 punkte gestiegen .

Created translation:
die damals wird es es schon 10.000 punkte gestiegen . 160 ,

Bleu-score: 0.22416933501922293



 ----------------------------------------------------------------------------------------------------
Actual Text:
they departed pledging to revive europe 's growth .

Actual translation:
mit der zusicherung , das wachstum in europa wieder zu beleben , gingen sie auseinander .

Created translation:
der eine zusicherung , ,

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
