In [1]:
import os
import pandas as pd
import numpy as np
import collections
import tensorflow as tf
import pickle

In [2]:
number_of_iteration=3
models_folder_name_test = os.path.join(os.getcwd(),'models','test',str(number_of_iteration))
models_folder_name_train = os.path.join(os.getcwd(),'models','train',str(number_of_iteration))
summaries_folder_name = os.path.join(os.getcwd(),'summaries','test', str(number_of_iteration))

path_to_preprocessed_texts_test = os.path.join(models_folder_name_test,'recipes_test_dataset.pkl')
path_to_preprocessed_texts_train = os.path.join(models_folder_name_train,'recipes_train_dataset.pkl')

df_preprocessed_texts_test = pd.read_pickle(path_to_preprocessed_texts_test)
df_preprocessed_texts_train = pd.read_pickle(path_to_preprocessed_texts_train)
df_preprocessed_texts_all=pd.concat([df_preprocessed_texts_test,df_preprocessed_texts_train]).sort_index()
words_to_ints = pd.read_pickle(os.path.join(models_folder_name_train, 'doc2vec_recipes_dict_words_integers.pkl'))

preprocessed_texts_test = df_preprocessed_texts_test.preprocessed_texts.values.tolist()
preprocessed_texts_train = df_preprocessed_texts_train.preprocessed_texts.values.tolist()
preprocessed_texts_all = df_preprocessed_texts_all.preprocessed_texts.values.tolist()
labels = df_preprocessed_texts_train['labels'].values.tolist()

unique_labels=sorted(set(labels))
number_categories=len(unique_labels)
categories_indices=np.linspace(0,number_categories-1,number_categories,dtype=int)
labels2integers=dict(zip(unique_labels,categories_indices))

print(labels2integers)
df_preprocessed_texts_all

{'steak': 1, 'sashimi': 0, 'tiramisu': 3, 'sushi': 2}


Unnamed: 0,labels,number_of_important_words,preprocessed_texts,text_names
0,sashimi,124,"[salmon, avocado, onion, wasabi, seed, rice, n...",1.txt
1,sashimi,10,"[tuna, wasabi, soy, sauce, ginger, grain, posi...",10.txt
2,sashimi,56,"[sashimi, salmon, filet, sesame, seed, oil, se...",2.txt
3,sashimi,125,"[sushi, filet, block, sushi, filet, block, cor...",3.txt
4,sashimi,57,"[coriander, leaf, sesame, seed, oil, sushi, sh...",4.txt
5,sashimi,64,"[sushi, filet, grain, water, salt, sugar, oil,...",5.txt
6,sashimi,19,"[sushi, coriander, tuna, sashimi, piece, tuna,...",6.txt
7,sashimi,26,"[tuna, avocado, cut, slice, oil, lime, juice, ...",7.txt
8,sashimi,57,"[tuna, piece, piece, ginger, soy, sauce, sauce...",8.txt
9,sashimi,42,"[sushi, rice, rice, vinegar, vinegar, sugar, s...",9.txt


In [3]:
batch_size = 8
generations = 75000
model_learning_rate = 0.0005

embedding_size = 24   #word embedding size
doc_embedding_size = 12  #document embedding size
concatenated_size = embedding_size + doc_embedding_size

save_embeddings_every = 5000
print_valid_every = 5000
print_loss_every = 50

In [4]:
#replace each word in texts with integer value
def text_to_numbers(preprocessed_texts, word_dict):
    data = []
    for prepr_text in preprocessed_texts:
        text_data = []
        for word in prepr_text:
            if word in word_dict:
                word_ix = word_dict[word]
                text_data.append(word_ix)
            #else:
            #    word_ix = 0
            #text_data.append(word_ix)
        data.append(text_data)
    return (data)


def create_batch_data(text_with_words_conv_to_numbers, batch_size=batch_size):
    batch_data = []
    label_data = []
    
    rand_text_ix = int(np.random.choice(len(text_with_words_conv_to_numbers), size=1))
    rand_text = text_with_words_conv_to_numbers[rand_text_ix]
    word_to_predict_label=np.random.choice(list(set(rand_text)), size=1,replace=False)
    
    while len(batch_data) < batch_size:
        item_in_batch=[]        
        
        label_words=np.random.choice(rand_text, size=1,replace=False)

        item_in_batch.extend(word_to_predict_label)
        item_in_batch.append(rand_text_ix)     
        label_data.extend(label_words)
        batch_data.append(item_in_batch)

        
    batch_data = np.array(batch_data)
    label_data = np.transpose(np.array(label_data))

    return (batch_data, label_data)

In [5]:
word_dictionary=words_to_ints
vocabulary_size=len(word_dictionary)
print(word_dictionary)
print(vocabulary_size)

word_dictionary_rev = dict(zip(word_dictionary.values(), word_dictionary.keys()))

{'mixer': 66, 'saucepan': 93, 'bottom': 13, 'metal': 65, 'sirloin': 99, 'white': 136, 'sieve': 98, 'cone': 24, 'curl': 30, 'sugar': 114, 'mascarpone': 59, 'leaf': 54, 'seed': 95, 'bit': 10, 'medium': 64, 'confectioner': 25, 'tomato': 121, 'slice': 101, 'tongs': 122, 'surface': 115, 'thickness': 118, 'garnish': 44, 'grill': 48, 'roe': 87, 'position': 79, 'bamboo': 6, 'tempura': 117, 'chive': 19, 'truffle': 126, 'strawberry': 112, 'fryer': 43, 'marinade': 58, 'preheat': 82, 'tuna': 127, 'water': 133, 'egg': 36, 'sauce': 92, 'rice': 86, 'vinegar': 130, 'butter': 15, 'towel': 125, 'coriander': 26, 'part': 74, 'pepper': 76, 'quantity': 85, 'steak': 110, 'mat': 60, 'roll': 88, 'wasabi': 132, 'wafer': 131, 'drain': 35, 'cream': 28, 'angle': 1, 'lime': 56, 'salmon': 89, 'peak': 75, 'liqueur': 57, 'tobikko': 120, 'sea': 94, 'batter': 7, 'bag': 4, 'powder': 81, 'press': 83, 'paper': 73, 'starch': 109, 'whisk': 135, 'nori': 70, 'guacamole': 50, 'juice': 51, 'torch': 124, 'mushroom': 68, 'space': 

In [6]:
text_data_test = text_to_numbers(preprocessed_texts_test, word_dictionary)
text_data_train = text_to_numbers(preprocessed_texts_train, word_dictionary)
print(np.shape(text_data_test))
print(np.shape(text_data_train))
text_data = []
text_data.extend(text_data_train)
text_data.extend(text_data_test)
print(np.shape(text_data))
print(text_data)

#print(text_data_test)
#print(text_data_train)

(9,)
(36,)
(45,)
[[89, 3, 72, 132, 95, 86, 69, 77, 89, 90, 89, 133, 41, 50, 72, 121, 90, 90, 90, 90, 90, 90, 90, 90, 31, 3, 3, 3, 3, 77, 72, 67, 3, 67, 50, 88, 56, 51, 56, 56, 51, 56, 51, 3, 51, 56, 50, 67, 132, 132, 10, 132, 50, 10, 10, 96, 95, 102, 92, 121, 121, 121, 3, 26, 54, 54, 67, 26, 50, 21, 89, 90, 89, 89, 41, 91, 31, 101, 21, 10, 109, 21, 101, 89, 109, 21, 109, 89, 89, 109, 89, 89, 89, 43, 101, 43, 89, 71, 89, 89, 111, 43, 89, 89, 89, 89, 89, 89, 43, 71, 89, 71, 89, 44, 89, 86, 69, 69, 89, 50, 26, 86, 69, 50], [127, 132, 102, 92, 45, 46, 79, 40, 46, 101], [91, 89, 38, 96, 95, 71, 96, 95, 92, 45, 38, 96, 95, 71, 115, 96, 95, 71, 40, 89, 38, 96, 95, 115, 40, 38, 38, 41, 96, 95, 71, 111, 38, 38, 41, 79, 38, 40, 38, 38, 31, 38, 92, 115, 11, 89, 101, 40, 79, 89, 92, 45, 89, 91, 101, 92], [116, 38, 12, 116, 38, 12, 26, 54, 96, 95, 71, 127, 127, 38, 101, 101, 38, 31, 127, 101, 12, 12, 127, 95, 71, 40, 71, 127, 26, 38, 115, 38, 12, 11, 41, 127, 12, 41, 41, 79, 38, 12, 1, 31, 1, 11, 1

In [7]:
valid_words = ['tuna', 'rice', 'sushi', 'roll', 'sashimi','steak','grill', 'sauce', 'cream']

valid_examples = [word_dictionary[x] for x in valid_words if x in word_dictionary.keys()]
print(valid_examples)

[127, 86, 116, 88, 91, 110, 48, 92, 28]


In [8]:
batch_data, label_data = create_batch_data(preprocessed_texts_all)
print(batch_data)
print(label_data)
print(np.shape(label_data))

[['ladyfinger' '42']
 ['ladyfinger' '42']
 ['ladyfinger' '42']
 ['ladyfinger' '42']
 ['ladyfinger' '42']
 ['ladyfinger' '42']
 ['ladyfinger' '42']
 ['ladyfinger' '42']]
['strawberry' 'mixer' 'powder' 'beat' 'spread' 'cream' 'speed' 'liqueur']
(8,)


In [9]:
tf.reset_default_graph()
embeddings = tf.get_variable("embeddings", shape=[vocabulary_size, embedding_size], trainable=False)
doc_embeddings = tf.get_variable("doc_embeddings", shape=[len(preprocessed_texts_train), doc_embedding_size])
decoder_weights = tf.get_variable("decoder_weights", shape=[vocabulary_size, concatenated_size], trainable=False)
decoder_biases = tf.get_variable("decoder_biases", shape=[vocabulary_size], trainable=False)
print(embeddings.trainable)
restorer = tf.train.Saver(name="restoring")


False


In [10]:
sess = tf.InteractiveSession()

In [11]:
restorer.restore(sess, os.path.join(models_folder_name_train,"doc2vec_recipes_checkpoint.ckpt"))
print("Model restored.")
# Check the values of the variables
print("embeddings : %s" % embeddings.eval())
print("doc_embeddings : %s" % doc_embeddings.eval())
print("decoder_weights : %s" % decoder_weights.eval())
print("decoder_biases : %s" % decoder_biases.eval())

INFO:tensorflow:Restoring parameters from /notebooks/school/text_feature_extraction/models/train/3/doc2vec_recipes_checkpoint.ckpt
Model restored.
embeddings : [[-0.19125338 -1.185797   -0.35253575 ...  0.08186969 -0.27173537
  -0.05202634]
 [ 0.49646756 -1.1386368   0.7941046  ... -0.29535997  0.4415489
  -0.42029768]
 [-0.22137743 -0.7155987   0.21770948 ... -0.64438117 -0.6595773
  -0.90182513]
 ...
 [-0.8538858  -0.17478627 -0.42274722 ... -0.24652176  0.4458565
  -0.49719742]
 [-0.84930944  0.6311769  -0.5172057  ... -0.6077494  -0.08328934
   0.48432577]
 [-0.14234668 -0.33095706 -0.19301212 ...  0.12308745  0.7391928
  -1.2279626 ]]
doc_embeddings : [[ 2.22474551e+00 -8.01887572e-01 -4.68756795e-01  1.49313843e+00
   5.43751299e-01 -1.54211986e+00  4.62474853e-01  1.96082962e+00
  -4.31935847e-01  2.14764690e+00 -1.47893679e+00 -1.28303778e+00]
 [-9.51134861e-01  1.78688809e-01  4.00638390e+00  1.24470305e+00
  -3.79157901e-01 -1.49972308e+00  2.43053007e+00 -1.67741954e+00
  -3

In [12]:
#variables_names = [v.name for v in tf.trainable_variables()]
#values = sess.run(variables_names)
#for k, v in zip(variables_names, values):
#    print("Variable: ", k)
#    print("Shape: ", v.shape)
#    print(v)

In [13]:
print('Creating Model')

#doc_embeddings_test = tf.Variable(tf.random_uniform([len(preprocessed_texts_test), doc_embedding_size], -1.0, 1.0), name="doc_embeddings_test")
#sess.run(tf.variables_initializer([doc_embeddings_test]))
doc_embeddings_test_initial_tensor = tf.random_uniform([len(preprocessed_texts_test), doc_embedding_size], -1.0, 1.0)
#doc_embeddings_data = tf.get_default_graph().get_tensor_by_name("doc_embeddings:0")
doc_embeddings_all_initial_tensor=tf.concat([doc_embeddings, doc_embeddings_test_initial_tensor],0,"concat")
#resize_var = tf.assign(doc_embeddings, doc_embeddings_all_data, validate_shape=False)
doc_embeddings_all=tf.Variable(doc_embeddings_all_initial_tensor,name = "doc_embeddings_all")
#sess.run(resize_var)
sess.run(tf.initialize_variables([doc_embeddings_all]))

print(doc_embeddings.shape)
print(doc_embeddings_all.shape)


x_inputs = tf.placeholder(tf.int32, shape=[None, 2]) #1 for word index and 1 for doc index
y_target = tf.placeholder(tf.int32, shape=[batch_size])
valid_dataset = tf.constant(valid_examples, dtype=tf.int32)

embed= tf.nn.embedding_lookup(embeddings, x_inputs[:, 0])
    
doc_indices = tf.slice(x_inputs, [0,1],[batch_size,1])
doc_embed = tf.nn.embedding_lookup(doc_embeddings_all,doc_indices)
final_embed = tf.concat([embed, tf.squeeze(doc_embed)],1)

logits = tf.matmul(final_embed, tf.transpose(decoder_weights)) + decoder_biases


loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=y_target))
optimizer = tf.train.AdamOptimizer(learning_rate=model_learning_rate)
train_step = optimizer.minimize(loss)

#cosine similarity between words
norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
normalized_embeddings = embeddings / norm
valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)
similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True, name="cosine_similarity")

with tf.name_scope("performance"):
    loss_ph = tf.placeholder(tf.float32,shape=None,name='loss_summary')
    loss_summary = tf.summary.scalar('loss', loss_ph)
performance_summaries = tf.summary.merge([loss_summary])

saver = tf.train.Saver()
summ_writer = tf.summary.FileWriter(summaries_folder_name, sess.graph)

#sess.run(tf.initialize_variables([doc_embeddings]))
#sess.run(tf.variables_initializer([doc_embeddings_test]))
sess.run(tf.variables_initializer(optimizer.variables()))





print('Starting Training')


for i in range(generations):
    #batch_inputs, batch_labels = create_batch_data(text_data)
    batch_inputs, batch_labels = create_batch_data(text_data)
    feed_dict = {x_inputs : batch_inputs, y_target : batch_labels}

    #run the train step
    sess.run(train_step, feed_dict=feed_dict)

    #return the loss
    if (i+1) % print_loss_every == 0:
        loss_val = sess.run(loss, feed_dict=feed_dict)
        summ = sess.run(performance_summaries, feed_dict={loss_ph:loss_val})
        summ_writer.add_summary(summ, i+1)
        print('Loss at step {} : {}'.format(i+1, loss_val))
        
    #validation
    if (i+1) % print_valid_every == 0:
        sim = sess.run(similarity, feed_dict=feed_dict)
        for j in range(len(valid_words)):
            valid_word = word_dictionary_rev[valid_examples[j]]
            top_k = 5 # number of nearest neighbors
            nearest = (-sim[j, :]).argsort()[1:top_k+1]
            log_str = "Nearest to {}:".format(valid_word)
            for k in range(top_k):
                close_word = word_dictionary_rev[nearest[k]]
                log_str = '{} {},'.format(log_str, close_word)
            print(log_str)
            
    #save dictionary + embeddings
    if (i+1) % save_embeddings_every == 0:
        #save vocabulary dictionary
        with open(os.path.join(models_folder_name_test,'doc2vec_recipes_dict_words_integers.pkl'), 'wb') as f:
            pickle.dump(word_dictionary, f)
        
        #save embeddings
        model_checkpoint_path = os.path.join(os.getcwd(),models_folder_name_test,'doc2vec_recipes_checkpoint.ckpt')
        save_path = saver.save(sess, model_checkpoint_path)
        print('Model saved in file: {}'.format(save_path))
        
sess.close()

Creating Model
Instructions for updating:
Use `tf.variables_initializer` instead.
(36, 12)
(45, 12)
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Starting Training
Loss at step 50 : 5.502607822418213
Loss at step 100 : 2.147388458251953
Loss at step 150 : 2.3359336853027344
Loss at step 200 : 1.708918809890747
Loss at step 250 : 2.044132709503174
Loss at step 300 : 3.7006564140319824
Loss at step 350 : 2.879246950149536
Loss at step 400 : 3.726989269256592
Loss at step 450 : 2.8127894401550293
Loss at step 500 : 3.6285953521728516
Loss at step 550 : 3.098546266555786
Loss at step 600 : 2.663766622543335
Loss at step 650 : 3.913980484008789
Loss at step 700 : 2.1862072944641113
Loss at step 750 : 2.496659755706787
Loss at step 800 : 2.5425453186035156
Loss at step 850 : 3.627035140991211
Loss at step 900 : 2.4707632064819336
Loss at step 950 : 4.655030250549316
Loss at step 1000 : 2.4711506366729736
Loss at step 1050 : 2.6332643032073975
Loss at step 1100 : 3.

Loss at step 9150 : 3.1286540031433105
Loss at step 9200 : 2.792107343673706
Loss at step 9250 : 2.7290754318237305
Loss at step 9300 : 3.5409631729125977
Loss at step 9350 : 2.711690664291382
Loss at step 9400 : 3.72178316116333
Loss at step 9450 : 3.5924973487854004
Loss at step 9500 : 4.813631534576416
Loss at step 9550 : 1.7528550624847412
Loss at step 9600 : 3.021453857421875
Loss at step 9650 : 3.30926251411438
Loss at step 9700 : 3.2403383255004883
Loss at step 9750 : 2.8271923065185547
Loss at step 9800 : 1.8850440979003906
Loss at step 9850 : 3.1443228721618652
Loss at step 9900 : 3.5409021377563477
Loss at step 9950 : 2.0105667114257812
Loss at step 10000 : 3.3722589015960693
Nearest to tuna: garnish, position, piece, daikon, flesh,
Nearest to rice: zip, slice, topping, wasabi, surface,
Nearest to sushi: mat, finger, wasabi, coriander, bamboo,
Nearest to roll: mat, juice, cucumber, bag, sushi,
Nearest to sashimi: roe, position, tongs, tomato, sieve,
Nearest to steak: grill, g

Loss at step 18000 : 1.9715404510498047
Loss at step 18050 : 2.6111562252044678
Loss at step 18100 : 1.778967022895813
Loss at step 18150 : 3.7686691284179688
Loss at step 18200 : 2.7840542793273926
Loss at step 18250 : 2.9440689086914062
Loss at step 18300 : 2.1501200199127197
Loss at step 18350 : 3.1980373859405518
Loss at step 18400 : 3.367288589477539
Loss at step 18450 : 2.912303924560547
Loss at step 18500 : 3.363318920135498
Loss at step 18550 : 2.580749273300171
Loss at step 18600 : 2.5024280548095703
Loss at step 18650 : 2.884413957595825
Loss at step 18700 : 2.4008984565734863
Loss at step 18750 : 2.536184787750244
Loss at step 18800 : 3.2444605827331543
Loss at step 18850 : 2.5420377254486084
Loss at step 18900 : 4.180784225463867
Loss at step 18950 : 2.4705252647399902
Loss at step 19000 : 3.2385826110839844
Loss at step 19050 : 4.228095531463623
Loss at step 19100 : 2.8139572143554688
Loss at step 19150 : 2.9588494300842285
Loss at step 19200 : 1.9669842720031738
Loss at s

Loss at step 26850 : 2.301682472229004
Loss at step 26900 : 2.8680858612060547
Loss at step 26950 : 3.1893985271453857
Loss at step 27000 : 2.17691707611084
Loss at step 27050 : 3.6936159133911133
Loss at step 27100 : 2.9263193607330322
Loss at step 27150 : 2.5549378395080566
Loss at step 27200 : 1.8195457458496094
Loss at step 27250 : 1.6586408615112305
Loss at step 27300 : 3.7755331993103027
Loss at step 27350 : 3.0709948539733887
Loss at step 27400 : 2.8610918521881104
Loss at step 27450 : 2.6766371726989746
Loss at step 27500 : 3.0342559814453125
Loss at step 27550 : 3.045374870300293
Loss at step 27600 : 2.562063217163086
Loss at step 27650 : 2.9304113388061523
Loss at step 27700 : 2.5616507530212402
Loss at step 27750 : 2.356492042541504
Loss at step 27800 : 2.51688289642334
Loss at step 27850 : 2.7186150550842285
Loss at step 27900 : 3.293484687805176
Loss at step 27950 : 3.911208152770996
Loss at step 28000 : 3.196042776107788
Loss at step 28050 : 3.22221302986145
Loss at step 

Loss at step 35700 : 2.438579559326172
Loss at step 35750 : 2.421420097351074
Loss at step 35800 : 3.2613484859466553
Loss at step 35850 : 3.3110275268554688
Loss at step 35900 : 2.9529430866241455
Loss at step 35950 : 2.6336071491241455
Loss at step 36000 : 3.2945070266723633
Loss at step 36050 : 2.2466464042663574
Loss at step 36100 : 2.6318936347961426
Loss at step 36150 : 3.049426555633545
Loss at step 36200 : 2.858464241027832
Loss at step 36250 : 2.661043405532837
Loss at step 36300 : 3.218641996383667
Loss at step 36350 : 3.1992642879486084
Loss at step 36400 : 3.329301357269287
Loss at step 36450 : 2.1478848457336426
Loss at step 36500 : 3.4636125564575195
Loss at step 36550 : 3.0309183597564697
Loss at step 36600 : 3.473888397216797
Loss at step 36650 : 2.6337273120880127
Loss at step 36700 : 3.1306962966918945
Loss at step 36750 : 3.148876190185547
Loss at step 36800 : 2.3655030727386475
Loss at step 36850 : 3.3524184226989746
Loss at step 36900 : 3.024749755859375
Loss at st

Loss at step 45050 : 3.3738958835601807
Loss at step 45100 : 2.450235366821289
Loss at step 45150 : 3.446472644805908
Loss at step 45200 : 2.9914727210998535
Loss at step 45250 : 3.3413805961608887
Loss at step 45300 : 2.812196969985962
Loss at step 45350 : 3.0125749111175537
Loss at step 45400 : 2.9752249717712402
Loss at step 45450 : 2.3947222232818604
Loss at step 45500 : 2.4854252338409424
Loss at step 45550 : 2.793323516845703
Loss at step 45600 : 2.1731550693511963
Loss at step 45650 : 2.94169282913208
Loss at step 45700 : 3.6233620643615723
Loss at step 45750 : 2.4859931468963623
Loss at step 45800 : 3.307863235473633
Loss at step 45850 : 3.1695728302001953
Loss at step 45900 : 2.644683361053467
Loss at step 45950 : 3.737200975418091
Loss at step 46000 : 3.5368576049804688
Loss at step 46050 : 3.373859167098999
Loss at step 46100 : 2.3456287384033203
Loss at step 46150 : 2.6796514987945557
Loss at step 46200 : 2.8406851291656494
Loss at step 46250 : 2.452122211456299
Loss at ste

Loss at step 54700 : 3.202031135559082
Loss at step 54750 : 4.164126873016357
Loss at step 54800 : 3.4795963764190674
Loss at step 54850 : 2.8623814582824707
Loss at step 54900 : 3.8238561153411865
Loss at step 54950 : 3.419337034225464
Loss at step 55000 : 2.551496744155884
Nearest to tuna: garnish, position, piece, daikon, flesh,
Nearest to rice: zip, slice, topping, wasabi, surface,
Nearest to sushi: mat, finger, wasabi, coriander, bamboo,
Nearest to roll: mat, juice, cucumber, bag, sushi,
Nearest to sashimi: roe, position, tongs, tomato, sieve,
Nearest to steak: grill, grate, ground, butter, juice,
Nearest to grill: ground, steak, grate, marinade, meat,
Nearest to sauce: soy, salt, starch, vinegar, sieve,
Nearest to cream: cocoa, cheese, espresso, spread, liqueur,
Model saved in file: /notebooks/school/text_feature_extraction/models/test/3/doc2vec_recipes_checkpoint.ckpt
Loss at step 55050 : 2.8504486083984375
Loss at step 55100 : 3.6905455589294434
Loss at step 55150 : 2.945041656

Loss at step 63550 : 3.244258165359497
Loss at step 63600 : 2.2884278297424316
Loss at step 63650 : 2.8943309783935547
Loss at step 63700 : 3.5848231315612793
Loss at step 63750 : 2.8997607231140137
Loss at step 63800 : 3.1855032444000244
Loss at step 63850 : 2.297828435897827
Loss at step 63900 : 4.557212829589844
Loss at step 63950 : 2.8168063163757324
Loss at step 64000 : 2.3521242141723633
Loss at step 64050 : 3.701371192932129
Loss at step 64100 : 2.0859599113464355
Loss at step 64150 : 3.538677930831909
Loss at step 64200 : 1.8715221881866455
Loss at step 64250 : 4.7383646965026855
Loss at step 64300 : 2.622919797897339
Loss at step 64350 : 5.508232116699219
Loss at step 64400 : 2.524136781692505
Loss at step 64450 : 2.328735589981079
Loss at step 64500 : 2.9342641830444336
Loss at step 64550 : 3.3153867721557617
Loss at step 64600 : 2.8120884895324707
Loss at step 64650 : 3.5186920166015625
Loss at step 64700 : 2.928703784942627
Loss at step 64750 : 2.7572226524353027
Loss at st

Loss at step 72400 : 2.71095609664917
Loss at step 72450 : 3.1751198768615723
Loss at step 72500 : 2.7872519493103027
Loss at step 72550 : 2.8900904655456543
Loss at step 72600 : 2.557168960571289
Loss at step 72650 : 3.2985496520996094
Loss at step 72700 : 2.64426851272583
Loss at step 72750 : 1.8029030561447144
Loss at step 72800 : 2.3105335235595703
Loss at step 72850 : 2.825434684753418
Loss at step 72900 : 2.9554104804992676
Loss at step 72950 : 2.076582670211792
Loss at step 73000 : 3.2847490310668945
Loss at step 73050 : 2.0359392166137695
Loss at step 73100 : 3.921448230743408
Loss at step 73150 : 2.7492332458496094
Loss at step 73200 : 2.262420654296875
Loss at step 73250 : 1.617479920387268
Loss at step 73300 : 2.9298224449157715
Loss at step 73350 : 3.3387300968170166
Loss at step 73400 : 2.504014015197754
Loss at step 73450 : 2.8080108165740967
Loss at step 73500 : 2.647523880004883
Loss at step 73550 : 3.1113033294677734
Loss at step 73600 : 2.372704029083252
Loss at step 