In [1]:
import os
import pandas as pd
import numpy as np
import collections
import tensorflow as tf
import pickle

In [2]:
number_of_iteration=3
models_folder_name_test = os.path.join(os.getcwd(),'models','test',str(number_of_iteration))
models_folder_name_train = os.path.join(os.getcwd(),'models','train',str(number_of_iteration))
summaries_folder_name = os.path.join(os.getcwd(),'summaries','test', str(number_of_iteration))

path_to_preprocessed_texts_test = os.path.join(models_folder_name_test,'recipes_test_dataset.pkl')
path_to_preprocessed_texts_train = os.path.join(models_folder_name_train,'recipes_train_dataset.pkl')

df_preprocessed_texts_test = pd.read_pickle(path_to_preprocessed_texts_test)
df_preprocessed_texts_train = pd.read_pickle(path_to_preprocessed_texts_train)
df_preprocessed_texts_all=pd.concat([df_preprocessed_texts_test,df_preprocessed_texts_train]).sort_index()
words_to_ints = pd.read_pickle(os.path.join(models_folder_name_train, 'doc2vec_recipes_dict_words_integers.pkl'))

preprocessed_texts_test = df_preprocessed_texts_test.preprocessed_texts.values.tolist()
preprocessed_texts_train = df_preprocessed_texts_train.preprocessed_texts.values.tolist()
preprocessed_texts_all = df_preprocessed_texts_all.preprocessed_texts.values.tolist()
labels = df_preprocessed_texts_train['labels'].values.tolist()

unique_labels=sorted(set(labels))
number_categories=len(unique_labels)
categories_indices=np.linspace(0,number_categories-1,number_categories,dtype=int)
labels2integers=dict(zip(unique_labels,categories_indices))

print(labels2integers)
df_preprocessed_texts_all

{'tiramisu': 3, 'sashimi': 0, 'sushi': 2, 'steak': 1}


Unnamed: 0,labels,number_of_important_words,preprocessed_texts,text_names
0,sashimi,124,"[salmon, avocado, onion, wasabi, seed, rice, n...",1.txt
1,sashimi,10,"[tuna, wasabi, soy, sauce, ginger, grain, posi...",10.txt
2,sashimi,56,"[sashimi, salmon, filet, sesame, seed, oil, se...",2.txt
3,sashimi,125,"[sushi, filet, block, sushi, filet, block, cor...",3.txt
4,sashimi,57,"[coriander, leaf, sesame, seed, oil, sushi, sh...",4.txt
5,sashimi,64,"[sushi, filet, grain, water, salt, sugar, oil,...",5.txt
6,sashimi,19,"[sushi, coriander, tuna, sashimi, piece, tuna,...",6.txt
7,sashimi,26,"[tuna, avocado, cut, slice, oil, lime, juice, ...",7.txt
8,sashimi,57,"[tuna, piece, piece, ginger, soy, sauce, sauce...",8.txt
9,sashimi,42,"[sushi, rice, rice, vinegar, vinegar, sugar, s...",9.txt


In [3]:
batch_size = 8
generations = 75000
model_learning_rate = 0.0005

embedding_size = 24   #word embedding size
doc_embedding_size = 12  #document embedding size
concatenated_size = embedding_size + doc_embedding_size

save_embeddings_every = 5000
print_valid_every = 5000
print_loss_every = 50

In [4]:
#replace each word in texts with integer value
def text_to_numbers(preprocessed_texts, word_dict):
    data = []
    for prepr_text in preprocessed_texts:
        text_data = []
        for word in prepr_text:
            if word in word_dict:
                word_ix = word_dict[word]
                text_data.append(word_ix)
            #else:
            #    word_ix = 0
            #text_data.append(word_ix)
        data.append(text_data)
    return (data)


def create_batch_data(text_with_words_conv_to_numbers, batch_size=batch_size):
    batch_data = []
    label_data = []
    
    rand_text_ix = int(np.random.choice(len(text_with_words_conv_to_numbers), size=1))
    rand_text = text_with_words_conv_to_numbers[rand_text_ix]
    word_to_predict_label=np.random.choice(list(set(rand_text)), size=1,replace=False)
    
    while len(batch_data) < batch_size:
        item_in_batch=[]        
        
        label_words=np.random.choice(rand_text, size=1,replace=False)

        item_in_batch.extend(word_to_predict_label)
        item_in_batch.append(rand_text_ix)     
        label_data.extend(label_words)
        batch_data.append(item_in_batch)

        
    batch_data = np.array(batch_data)
    label_data = np.transpose(np.array(label_data))

    return (batch_data, label_data)

In [5]:
word_dictionary=words_to_ints
vocabulary_size=len(word_dictionary)
print(word_dictionary)
print(vocabulary_size)

word_dictionary_rev = dict(zip(word_dictionary.values(), word_dictionary.keys()))

{'air': 0, 'crab': 30, 'roll': 92, 'preheat': 85, 'cake': 17, 'tuna': 133, 'yolk': 144, 'part': 77, 'soy': 107, 'finger': 43, 'guacamole': 53, 'filet': 41, 'egg': 39, 'cream': 31, 'mayonnaise': 65, 'berry': 9, 'truffle': 132, 'bamboo': 6, 'roe': 91, 'pinch': 81, 'liqueur': 60, 'chopstick': 23, 'drain': 38, 'ball': 5, 'vanilla': 134, 'seed': 100, 'mushroom': 71, 'chive': 21, 'salmon': 94, 'blade': 11, 'sauce': 97, 'brush': 15, 'speed': 109, 'breast': 14, 'marinade': 61, 'grate': 50, 'whip': 140, 'strip': 119, 'tongs': 128, 'coffee': 26, 'slice': 106, 'leg': 58, 'sushi': 122, 'onion': 75, 'strawberry': 118, 'sugar': 120, 'worcestershire': 143, 'piece': 80, 'steak': 115, 'saucepan': 98, 'surface': 121, 'cling': 24, 'cut': 34, 'espresso': 40, 'wafer': 137, 'salt': 95, 'asparagus': 2, 'topping': 129, 'grain': 49, 'carrot': 18, 'lime': 59, 'water': 139, 'raspberry': 89, 'bottom': 13, 'wasabi': 138, 'row': 93, 'medium': 67, 'cutting': 35, 'garnish': 47, 'pour': 83, 'press': 86, 'sieve': 103, 

In [6]:
text_data_test = text_to_numbers(preprocessed_texts_test, word_dictionary)
text_data_train = text_to_numbers(preprocessed_texts_train, word_dictionary)
print(np.shape(text_data_test))
print(np.shape(text_data_train))
text_data = []
text_data.extend(text_data_train)
text_data.extend(text_data_test)
print(np.shape(text_data))
print(text_data)

#print(text_data_test)
#print(text_data_train)

(9,)
(36,)
(45,)
[[94, 3, 75, 138, 100, 90, 72, 80, 94, 95, 94, 139, 44, 53, 75, 127, 95, 95, 95, 95, 95, 95, 95, 95, 34, 3, 3, 3, 3, 80, 75, 70, 3, 70, 53, 92, 59, 54, 59, 59, 54, 59, 54, 3, 54, 59, 53, 70, 138, 138, 10, 138, 53, 10, 10, 101, 100, 107, 97, 127, 127, 127, 3, 29, 57, 57, 70, 29, 53, 24, 94, 95, 94, 94, 44, 96, 34, 106, 24, 10, 114, 24, 106, 94, 114, 24, 114, 94, 94, 114, 94, 94, 94, 46, 106, 46, 94, 74, 94, 94, 116, 46, 94, 94, 94, 94, 94, 94, 46, 74, 94, 74, 94, 47, 94, 90, 72, 72, 94, 53, 29, 90, 72, 53], [133, 138, 107, 97, 48, 49, 82, 43, 49, 106], [122, 41, 12, 122, 41, 12, 29, 57, 101, 100, 74, 133, 133, 41, 106, 106, 41, 34, 133, 106, 12, 12, 133, 100, 74, 43, 74, 133, 29, 41, 121, 41, 12, 11, 44, 133, 12, 44, 44, 82, 41, 12, 1, 34, 1, 11, 106, 44, 106, 133, 12, 11, 34, 106, 47, 47, 43, 18, 5, 82, 5, 106, 122, 18, 5, 11, 82, 106, 80, 106, 106, 94, 43, 43, 122, 5, 82, 5, 32, 18, 5, 133, 106, 122, 106, 1, 82, 1, 106, 47, 106, 43, 106, 133, 106, 106, 47, 106, 57, 32

In [7]:
valid_words = ['tuna', 'rice', 'sushi', 'roll', 'sashimi','steak','grill', 'sauce', 'cream']

valid_examples = [word_dictionary[x] for x in valid_words if x in word_dictionary.keys()]
print(valid_examples)

[133, 90, 122, 92, 96, 115, 51, 97, 31]


In [8]:
batch_data, label_data = create_batch_data(preprocessed_texts_all)
print(batch_data)
print(label_data)
print(np.shape(label_data))

[['mascarpone' '35']
 ['mascarpone' '35']
 ['mascarpone' '35']
 ['mascarpone' '35']
 ['mascarpone' '35']
 ['mascarpone' '35']
 ['mascarpone' '35']
 ['mascarpone' '35']]
['medium' 'powder' 'yolk' 'ladyfinger' 'sugar' 'yolk' 'ladyfinger'
 'mixture']
(8,)


In [9]:
tf.reset_default_graph()
embeddings = tf.get_variable("embeddings", shape=[vocabulary_size, embedding_size], trainable=False)
doc_embeddings = tf.get_variable("doc_embeddings", shape=[len(preprocessed_texts_train), doc_embedding_size])
decoder_weights = tf.get_variable("decoder_weights", shape=[vocabulary_size, concatenated_size], trainable=False)
decoder_biases = tf.get_variable("decoder_biases", shape=[vocabulary_size], trainable=False)
print(embeddings.trainable)
restorer = tf.train.Saver(name="restoring")


False


In [10]:
sess = tf.InteractiveSession()

In [11]:
restorer.restore(sess, os.path.join(models_folder_name_train,"doc2vec_recipes_checkpoint.ckpt"))
print("Model restored.")
# Check the values of the variables
print("embeddings : %s" % embeddings.eval())
print("doc_embeddings : %s" % doc_embeddings.eval())
print("decoder_weights : %s" % decoder_weights.eval())
print("decoder_biases : %s" % decoder_biases.eval())

INFO:tensorflow:Restoring parameters from /notebooks/school/text_feature_extraction/models/train/2/doc2vec_recipes_checkpoint.ckpt
Model restored.
embeddings : [[ 0.16318983  0.0157128   1.1473163  ... -0.36404973 -0.47158304
  -0.9127127 ]
 [-0.3714574   0.68322307 -0.08932335 ...  0.77678955 -0.42184588
  -0.627995  ]
 [ 0.36375847  0.5762724   0.46977198 ... -0.35758272  0.12533186
  -0.36692593]
 ...
 [ 0.9877567  -0.24879673  0.5887008  ...  0.09593772 -0.69610685
   0.7749497 ]
 [-0.01066543 -0.54074144  0.9609981  ... -0.19811237 -0.07805609
   0.06442046]
 [-0.16597985 -0.22206676  0.48183683 ... -0.00731345  0.35793105
  -0.35809818]]
doc_embeddings : [[-0.0643751   0.3981141  -1.2441322   1.4530942  -3.1478906   0.5494276
  -1.0748886   0.18218173 -0.04573985 -0.7446831  -1.5485166   2.43101   ]
 [-2.0126398   1.6969712  -1.9203137  -3.0515747   1.0123584  -1.4938012
   0.40120342  4.2204633  -0.4455142  -1.768436   -2.3949094  -0.20840812]
 [-2.4571276   0.10651758 -0.856975

In [12]:
#variables_names = [v.name for v in tf.trainable_variables()]
#values = sess.run(variables_names)
#for k, v in zip(variables_names, values):
#    print("Variable: ", k)
#    print("Shape: ", v.shape)
#    print(v)

In [13]:
print('Creating Model')

#doc_embeddings_test = tf.Variable(tf.random_uniform([len(preprocessed_texts_test), doc_embedding_size], -1.0, 1.0), name="doc_embeddings_test")
#sess.run(tf.variables_initializer([doc_embeddings_test]))
doc_embeddings_test_initial_tensor = tf.random_uniform([len(preprocessed_texts_test), doc_embedding_size], -1.0, 1.0)
#doc_embeddings_data = tf.get_default_graph().get_tensor_by_name("doc_embeddings:0")
doc_embeddings_all_initial_tensor=tf.concat([doc_embeddings, doc_embeddings_test_initial_tensor],0,"concat")
#resize_var = tf.assign(doc_embeddings, doc_embeddings_all_data, validate_shape=False)
doc_embeddings_all=tf.Variable(doc_embeddings_all_initial_tensor,name = "doc_embeddings_all")
#sess.run(resize_var)
sess.run(tf.initialize_variables([doc_embeddings_all]))

print(doc_embeddings.shape)
print(doc_embeddings_all.shape)


x_inputs = tf.placeholder(tf.int32, shape=[None, 2]) #1 for word index and 1 for doc index
y_target = tf.placeholder(tf.int32, shape=[batch_size])
valid_dataset = tf.constant(valid_examples, dtype=tf.int32)

embed= tf.nn.embedding_lookup(embeddings, x_inputs[:, 0])
    
doc_indices = tf.slice(x_inputs, [0,1],[batch_size,1])
doc_embed = tf.nn.embedding_lookup(doc_embeddings_all,doc_indices)
final_embed = tf.concat([embed, tf.squeeze(doc_embed)],1)

logits = tf.matmul(final_embed, tf.transpose(decoder_weights)) + decoder_biases


loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=y_target))
optimizer = tf.train.AdamOptimizer(learning_rate=model_learning_rate)
train_step = optimizer.minimize(loss)

#cosine similarity between words
norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
normalized_embeddings = embeddings / norm
valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset)
similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True, name="cosine_similarity")

with tf.name_scope("performance"):
    loss_ph = tf.placeholder(tf.float32,shape=None,name='loss_summary')
    loss_summary = tf.summary.scalar('loss', loss_ph)
performance_summaries = tf.summary.merge([loss_summary])

saver = tf.train.Saver()
summ_writer = tf.summary.FileWriter(summaries_folder_name, sess.graph)

#sess.run(tf.initialize_variables([doc_embeddings]))
#sess.run(tf.variables_initializer([doc_embeddings_test]))
sess.run(tf.variables_initializer(optimizer.variables()))





print('Starting Training')


for i in range(generations):
    #batch_inputs, batch_labels = create_batch_data(text_data)
    batch_inputs, batch_labels = create_batch_data(text_data)
    feed_dict = {x_inputs : batch_inputs, y_target : batch_labels}

    #run the train step
    sess.run(train_step, feed_dict=feed_dict)

    #return the loss
    if (i+1) % print_loss_every == 0:
        loss_val = sess.run(loss, feed_dict=feed_dict)
        summ = sess.run(performance_summaries, feed_dict={loss_ph:loss_val})
        summ_writer.add_summary(summ, i+1)
        print('Loss at step {} : {}'.format(i+1, loss_val))
        
    #validation
    if (i+1) % print_valid_every == 0:
        sim = sess.run(similarity, feed_dict=feed_dict)
        for j in range(len(valid_words)):
            valid_word = word_dictionary_rev[valid_examples[j]]
            top_k = 5 # number of nearest neighbors
            nearest = (-sim[j, :]).argsort()[1:top_k+1]
            log_str = "Nearest to {}:".format(valid_word)
            for k in range(top_k):
                close_word = word_dictionary_rev[nearest[k]]
                log_str = '{} {},'.format(log_str, close_word)
            print(log_str)
            
    #save dictionary + embeddings
    if (i+1) % save_embeddings_every == 0:
        #save vocabulary dictionary
        with open(os.path.join(models_folder_name_test,'doc2vec_recipes_dict_words_integers.pkl'), 'wb') as f:
            pickle.dump(word_dictionary, f)
        
        #save embeddings
        model_checkpoint_path = os.path.join(os.getcwd(),models_folder_name_test,'doc2vec_recipes_checkpoint.ckpt')
        save_path = saver.save(sess, model_checkpoint_path)
        print('Model saved in file: {}'.format(save_path))
        
sess.close()

Creating Model
Instructions for updating:
Use `tf.variables_initializer` instead.
(36, 12)
(45, 12)
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Starting Training
Loss at step 50 : 3.058952569961548
Loss at step 100 : 2.8579609394073486
Loss at step 150 : 2.6407299041748047
Loss at step 200 : 2.4341206550598145
Loss at step 250 : 2.806662082672119
Loss at step 300 : 3.3923468589782715
Loss at step 350 : 2.295071840286255
Loss at step 400 : 3.9047164916992188
Loss at step 450 : 2.7947142124176025
Loss at step 500 : 3.8241539001464844
Loss at step 550 : 2.741347551345825
Loss at step 600 : 2.588061571121216
Loss at step 650 : 2.8121068477630615
Loss at step 700 : 3.637629508972168
Loss at step 750 : 2.487825632095337
Loss at step 800 : 2.6784987449645996
Loss at step 850 : 2.650233745574951
Loss at step 900 : 2.7082042694091797
Loss at step 950 : 2.3866844177246094
Loss at step 1000 : 2.2769904136657715
Loss at step 1050 : 4.945453643798828
Loss at step 1100 :

Loss at step 9150 : 2.930691719055176
Loss at step 9200 : 4.0857110023498535
Loss at step 9250 : 2.8266191482543945
Loss at step 9300 : 3.566781520843506
Loss at step 9350 : 2.4716339111328125
Loss at step 9400 : 5.131914138793945
Loss at step 9450 : 2.446767807006836
Loss at step 9500 : 2.481318950653076
Loss at step 9550 : 2.385599136352539
Loss at step 9600 : 2.7752575874328613
Loss at step 9650 : 2.328319549560547
Loss at step 9700 : 3.378056526184082
Loss at step 9750 : 2.5716638565063477
Loss at step 9800 : 2.9947280883789062
Loss at step 9850 : 3.794292449951172
Loss at step 9900 : 2.9429702758789062
Loss at step 9950 : 3.3360559940338135
Loss at step 10000 : 2.7206146717071533
Nearest to tuna: grain, leaf, brush, juice, seed,
Nearest to rice: roe, surface, filling, blade, brush,
Nearest to sushi: piece, space, carrot, sashimi, zip,
Nearest to roll: carrot, crab, coriander, paper, teriyaki,
Nearest to sashimi: sesame, starch, fish, leaf, nori,
Nearest to steak: preheat, ground, 

Loss at step 18000 : 2.313159465789795
Loss at step 18050 : 2.3962340354919434
Loss at step 18100 : 2.726264238357544
Loss at step 18150 : 2.339568853378296
Loss at step 18200 : 2.4435911178588867
Loss at step 18250 : 2.3175196647644043
Loss at step 18300 : 4.184787273406982
Loss at step 18350 : 3.184863567352295
Loss at step 18400 : 3.1035966873168945
Loss at step 18450 : 2.820713520050049
Loss at step 18500 : 5.439957141876221
Loss at step 18550 : 2.454987049102783
Loss at step 18600 : 1.398200511932373
Loss at step 18650 : 2.779201030731201
Loss at step 18700 : 2.3195924758911133
Loss at step 18750 : 2.950563430786133
Loss at step 18800 : 2.170604944229126
Loss at step 18850 : 3.3816018104553223
Loss at step 18900 : 3.273651361465454
Loss at step 18950 : 2.157461643218994
Loss at step 19000 : 3.2127790451049805
Loss at step 19050 : 2.544257879257202
Loss at step 19100 : 1.8404470682144165
Loss at step 19150 : 3.6875367164611816
Loss at step 19200 : 3.84039568901062
Loss at step 1925

Loss at step 26900 : 3.5088276863098145
Loss at step 26950 : 2.992300510406494
Loss at step 27000 : 3.714038610458374
Loss at step 27050 : 2.6908795833587646
Loss at step 27100 : 2.6797518730163574
Loss at step 27150 : 1.2789390087127686
Loss at step 27200 : 3.0313377380371094
Loss at step 27250 : 2.2482502460479736
Loss at step 27300 : 3.199490547180176
Loss at step 27350 : 2.2635364532470703
Loss at step 27400 : 2.2613110542297363
Loss at step 27450 : 2.3949344158172607
Loss at step 27500 : 3.022629499435425
Loss at step 27550 : 2.201231002807617
Loss at step 27600 : 2.6531901359558105
Loss at step 27650 : 3.2377724647521973
Loss at step 27700 : 3.2486677169799805
Loss at step 27750 : 4.949773788452148
Loss at step 27800 : 3.434231758117676
Loss at step 27850 : 3.0469167232513428
Loss at step 27900 : 3.6374711990356445
Loss at step 27950 : 2.801950454711914
Loss at step 28000 : 2.633666515350342
Loss at step 28050 : 2.4437761306762695
Loss at step 28100 : 2.562628746032715
Loss at st

Loss at step 35700 : 2.9197592735290527
Loss at step 35750 : 2.7628705501556396
Loss at step 35800 : 2.196964740753174
Loss at step 35850 : 3.074491262435913
Loss at step 35900 : 2.915529489517212
Loss at step 35950 : 3.5876684188842773
Loss at step 36000 : 3.3381896018981934
Loss at step 36050 : 2.9049363136291504
Loss at step 36100 : 4.298178672790527
Loss at step 36150 : 4.756310939788818
Loss at step 36200 : 4.365467071533203
Loss at step 36250 : 2.73508620262146
Loss at step 36300 : 3.120785713195801
Loss at step 36350 : 3.5026278495788574
Loss at step 36400 : 3.356668472290039
Loss at step 36450 : 5.014613628387451
Loss at step 36500 : 3.869354248046875
Loss at step 36550 : 3.190053939819336
Loss at step 36600 : 3.0029754638671875
Loss at step 36650 : 2.8165903091430664
Loss at step 36700 : 2.2005350589752197
Loss at step 36750 : 2.4575252532958984
Loss at step 36800 : 3.2301025390625
Loss at step 36850 : 2.4443411827087402
Loss at step 36900 : 2.6079750061035156
Loss at step 369

Loss at step 45050 : 2.7766833305358887
Loss at step 45100 : 2.51812744140625
Loss at step 45150 : 2.907332181930542
Loss at step 45200 : 2.5258026123046875
Loss at step 45250 : 3.744718313217163
Loss at step 45300 : 2.8791375160217285
Loss at step 45350 : 2.885841131210327
Loss at step 45400 : 3.114434242248535
Loss at step 45450 : 3.214751958847046
Loss at step 45500 : 3.423844575881958
Loss at step 45550 : 3.0075254440307617
Loss at step 45600 : 3.203125
Loss at step 45650 : 2.7756049633026123
Loss at step 45700 : 3.190863847732544
Loss at step 45750 : 2.39113187789917
Loss at step 45800 : 2.731549024581909
Loss at step 45850 : 2.5115129947662354
Loss at step 45900 : 3.3870108127593994
Loss at step 45950 : 2.389962673187256
Loss at step 46000 : 2.1191203594207764
Loss at step 46050 : 2.771040678024292
Loss at step 46100 : 2.5731544494628906
Loss at step 46150 : 2.567490577697754
Loss at step 46200 : 3.028702974319458
Loss at step 46250 : 2.7533888816833496
Loss at step 46300 : 2.336

Loss at step 54650 : 2.454263687133789
Loss at step 54700 : 3.0572915077209473
Loss at step 54750 : 4.671723365783691
Loss at step 54800 : 2.222893714904785
Loss at step 54850 : 4.06284761428833
Loss at step 54900 : 3.5888454914093018
Loss at step 54950 : 2.460597515106201
Loss at step 55000 : 2.5278046131134033
Nearest to tuna: grain, leaf, brush, juice, seed,
Nearest to rice: roe, surface, filling, blade, brush,
Nearest to sushi: piece, space, carrot, sashimi, zip,
Nearest to roll: carrot, crab, coriander, paper, teriyaki,
Nearest to sashimi: sesame, starch, fish, leaf, nori,
Nearest to steak: preheat, ground, grill, salt, vinegar,
Nearest to grill: preheat, grate, marinade, ground, worcestershire,
Nearest to sauce: onion, sesame, truffle, salt, cone,
Nearest to cream: coffee, cheese, confectioner, bottom, liqueur,
Model saved in file: /notebooks/school/text_feature_extraction/models/test/2/doc2vec_recipes_checkpoint.ckpt
Loss at step 55050 : 2.5386617183685303
Loss at step 55100 : 2

Loss at step 63500 : 2.4254953861236572
Loss at step 63550 : 3.180607557296753
Loss at step 63600 : 2.5792343616485596
Loss at step 63650 : 3.5115184783935547
Loss at step 63700 : 2.5311546325683594
Loss at step 63750 : 3.9828438758850098
Loss at step 63800 : 1.9277875423431396
Loss at step 63850 : 3.7658839225769043
Loss at step 63900 : 3.1013307571411133
Loss at step 63950 : 2.1662216186523438
Loss at step 64000 : 2.330043077468872
Loss at step 64050 : 3.168379783630371
Loss at step 64100 : 4.915131092071533
Loss at step 64150 : 3.3536505699157715
Loss at step 64200 : 3.025909423828125
Loss at step 64250 : 2.8458433151245117
Loss at step 64300 : 3.515390157699585
Loss at step 64350 : 2.8466413021087646
Loss at step 64400 : 2.193826675415039
Loss at step 64450 : 2.6212551593780518
Loss at step 64500 : 1.9885929822921753
Loss at step 64550 : 3.225348949432373
Loss at step 64600 : 3.1525657176971436
Loss at step 64650 : 3.386655569076538
Loss at step 64700 : 2.2179551124572754
Loss at s

Loss at step 72300 : 3.949219226837158
Loss at step 72350 : 2.194791316986084
Loss at step 72400 : 2.4383316040039062
Loss at step 72450 : 3.191035747528076
Loss at step 72500 : 3.2878313064575195
Loss at step 72550 : 2.8235697746276855
Loss at step 72600 : 2.4027280807495117
Loss at step 72650 : 2.1969780921936035
Loss at step 72700 : 2.84018611907959
Loss at step 72750 : 2.2164368629455566
Loss at step 72800 : 2.584534168243408
Loss at step 72850 : 3.7449822425842285
Loss at step 72900 : 2.8225159645080566
Loss at step 72950 : 2.7600834369659424
Loss at step 73000 : 2.699350595474243
Loss at step 73050 : 3.231889009475708
Loss at step 73100 : 2.7057671546936035
Loss at step 73150 : 2.0836024284362793
Loss at step 73200 : 2.9320526123046875
Loss at step 73250 : 2.158360004425049
Loss at step 73300 : 2.52298641204834
Loss at step 73350 : 2.671820640563965
Loss at step 73400 : 3.109055519104004
Loss at step 73450 : 4.115716934204102
Loss at step 73500 : 3.259422540664673
Loss at step 73