In [1]:
import tensorflow as tf
import tqdm
import datetime

from data_processing import get_preprocessed_data
from model import SkipGram, get_Model

# loading data
file_name = "bible.txt"
file_path = f"data/{file_name}"

config_name = "Voc-10000_Epoch-15"

VOCABULARY = 10000 #10000
WINDOW = 1 # because window is i - 2 and i + 2 then = 5
TRAIN_PART = 0.8 # partition of the data to be training data
EMBEDDING = 32 # size of the embedding
BATCH = 64 # batch_size
EPOCHS = 15
K = 5
NEGATIVE_SAMPLES = 1

words_keep_track = ["holy", "father", "wine", "poison", "love", "strong", "day"]

(train_ds, test_ds), tokenizer = get_preprocessed_data(file_path,VOCABULARY,WINDOW,TRAIN_PART,BATCH)
words_sequence = tf.reshape(tf.cast(tokenizer.texts_to_sequences([words_keep_track]),tf.int64),[-1])
cosine_similarity = tf.keras.losses.CosineSimilarity(axis=1)

model = SkipGram(VOCABULARY,EMBEDDING,NEGATIVE_SAMPLES)
#model, skipgram = 
model.compile(optimizer='adam')

# build model
test_tensor = tf.zeros(shape=(64,2,1))
model(test_tensor)

#training loop 

time_string = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# custom instead of time_string
config = config_name

train_file_path = f"logs/{config_name}/{time_string}/train"
test_file_path = f"logs/{config_name}/{time_string}/test"
train_summary_writer = tf.summary.create_file_writer(train_file_path)
test_summary_writer = tf.summary.create_file_writer(test_file_path)

for e in range (EPOCHS):

    for s in tqdm.tqdm(train_ds,position=0,leave=True):
        metrics = model.train_step(tf.expand_dims(s,axis=-1))
        break # TODO

    # log in tensorboard and print
    with train_summary_writer.as_default():
        [tf.summary.scalar(name = m.name, data = m.result(),step=e) for m in model.metrics]

    [ tf.print(f"Epoch {e} {k}: {v.numpy()} ") for (k,v) in metrics.items() ]

    model.reset_metrics()

    print("\nEpoch: ", e)
    print("Evaluation k-nearest neighbours using cosine similarity")

    # calculate whole embedding 
    whole_embedding = [tf.nn.embedding_lookup(model.embedding, tf.expand_dims(i,axis=-1)) for i in tf.range(VOCABULARY,dtype=tf.int64)]

    # calculate embedding of words
    track_words_embedding = [tf.nn.embedding_lookup(model.embedding, tf.expand_dims(w,axis=-1)) for w in words_sequence]
    for j,tw in enumerate (tqdm.tqdm(track_words_embedding)):
        # calculate cosine similarities between whole and words 
        cosines = [(cosine_similarity(tw,we),i) for i,we in enumerate(whole_embedding)]

        # sort by distance and return k-nearest
        sorted_cosines = sorted(cosines, reverse=True)
        # sequence to text of nearest neighbours
        words_neighbours = tf.reshape(tokenizer.sequences_to_texts([tf.reshape(sorted_cosines[:K],(K,-1))[:,1].numpy()]),[-1])
        words_neighbours = str(words_neighbours.numpy().item())[2:-1]
        # print word with its k-nearest (maybe with cosine similarities)
        print(words_keep_track[j], ": ")
        for i, word in enumerate(words_neighbours.split(" ")):
            print(word, ": ", sorted_cosines[i].__str__().split(",")[2][7:-1])
        print()






2023-02-06 14:16:06.356184: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Initilaize creating Tokens: 1.389 sec
Initialize Paring: 1.5374 sec
Pairs length:  782782
Create Datasets and initialize Preprocess: 

2023-02-06 14:16:13.520148: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-06 14:16:13.521766: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


3.205 sec


  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:16:14.783100: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 0 loss: 2.662961483001709 






Epoch:  0
Evaluation k-nearest neighbours using cosine similarity


 14%|████████████▏                                                                        | 1/7 [00:13<01:22, 13.74s/it]

holy : 
dash :  0.5958506
determined :  0.56684345
therein :  0.5628224
curseth :  0.5518258
such :  0.5274934



 29%|████████████████████████▎                                                            | 2/7 [00:27<01:09, 13.88s/it]

father : 
enquired :  0.58724624
arise :  0.5732074
inform :  0.56128293
ararat :  0.55694956
amzi :  0.55148864



 43%|████████████████████████████████████▍                                                | 3/7 [00:42<00:56, 14.11s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:55<00:42, 14.01s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:10<00:28, 14.14s/it]

love : 
demetrius :  0.5931341
haggi :  0.58357596
point :  0.57561445
plow :  0.55814046
anethothite :  0.5540074



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:24<00:14, 14.23s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:38<00:00, 14.09s/it]


day : 
seeing :  0.60380214
teats :  0.6014524
figure :  0.5779697
sheaves :  0.5633934
propitiation :  0.5617509



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:17:56.622846: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 1 loss: 10.32557201385498 

Epoch:  1
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:13<01:23, 13.88s/it]

holy : 
dash :  0.5958506
determined :  0.56684345
therein :  0.5628224
curseth :  0.5518258
such :  0.5274934



 29%|████████████████████████▎                                                            | 2/7 [00:27<01:10, 14.02s/it]

father : 
enquired :  0.58724624
arise :  0.5732074
inform :  0.56128293
ararat :  0.55694956
amzi :  0.55148864



 43%|████████████████████████████████████▍                                                | 3/7 [00:42<00:56, 14.05s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:56<00:42, 14.23s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:10<00:28, 14.27s/it]

love : 
demetrius :  0.5931341
haggi :  0.58357596
point :  0.57561445
plow :  0.55814046
anethothite :  0.5540074



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:24<00:14, 14.15s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:38<00:00, 14.13s/it]


day : 
seeing :  0.6011829
teats :  0.59855556
figure :  0.58090943
sheaves :  0.5634252
propitiation :  0.5618253



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:19:38.869311: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 2 loss: 5.579722881317139 

Epoch:  2
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:14<01:25, 14.22s/it]

holy : 
dash :  0.5958506
determined :  0.56684345
therein :  0.5628224
curseth :  0.5518258
such :  0.5274934



 29%|████████████████████████▎                                                            | 2/7 [00:28<01:11, 14.22s/it]

father : 
enquired :  0.58724624
arise :  0.5732074
inform :  0.56128293
ararat :  0.55694956
amzi :  0.55148864



 43%|████████████████████████████████████▍                                                | 3/7 [00:42<00:56, 14.05s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:56<00:41, 13.98s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:10<00:28, 14.08s/it]

love : 
demetrius :  0.5931341
haggi :  0.58357596
point :  0.57561445
plow :  0.55814046
anethothite :  0.5540074



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:24<00:14, 14.08s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:38<00:00, 14.08s/it]


day : 
seeing :  0.598958
teats :  0.5961168
figure :  0.5829926
sheaves :  0.56326395
propitiation :  0.56169766



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:21:20.751343: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 3 loss: 11.004671096801758 

Epoch:  3
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:13<01:23, 13.98s/it]

holy : 
dash :  0.5958506
determined :  0.56684345
therein :  0.5628224
curseth :  0.5518258
such :  0.5274934



 29%|████████████████████████▎                                                            | 2/7 [00:27<01:10, 14.00s/it]

father : 
enquired :  0.58724624
arise :  0.5732074
inform :  0.56128293
ararat :  0.55694956
amzi :  0.55148864



 43%|████████████████████████████████████▍                                                | 3/7 [00:41<00:55, 13.97s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:55<00:41, 13.95s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:09<00:27, 13.91s/it]

love : 
demetrius :  0.5920911
haggi :  0.5860636
point :  0.5720621
plow :  0.55523217
anethothite :  0.5534669



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:23<00:13, 13.90s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:37<00:00, 13.94s/it]


day : 
seeing :  0.5970069
teats :  0.5939909
figure :  0.58457446
sheaves :  0.5630112
propitiation :  0.5614728



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:23:01.735461: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 4 loss: 2.6604623794555664 

Epoch:  4
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:13<01:23, 13.93s/it]

holy : 
dash :  0.5958506
determined :  0.56684345
therein :  0.5628224
curseth :  0.5518258
such :  0.5274934



 29%|████████████████████████▎                                                            | 2/7 [00:27<01:09, 13.88s/it]

father : 
enquired :  0.58724624
arise :  0.5732074
inform :  0.56128293
ararat :  0.55694956
amzi :  0.55148864



 43%|████████████████████████████████████▍                                                | 3/7 [00:41<00:55, 13.93s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:55<00:41, 13.95s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:09<00:27, 13.96s/it]

love : 
demetrius :  0.5910789
haggi :  0.5880025
point :  0.56895816
anethothite :  0.55288374
plow :  0.5526705



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:23<00:13, 13.93s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:37<00:00, 13.94s/it]


day : 
seeing :  0.59384924
teats :  0.5905622
figure :  0.586831
sheaves :  0.56241226
propitiation :  0.5607639



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:24:42.672926: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 5 loss: 10.989775657653809 

Epoch:  5
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:14<01:24, 14.03s/it]

holy : 
dash :  0.5958506
determined :  0.56684345
therein :  0.5610199
curseth :  0.5518258
such :  0.5274934



 29%|████████████████████████▎                                                            | 2/7 [00:27<01:09, 13.91s/it]

father : 
enquired :  0.58724624
arise :  0.5732074
inform :  0.56128293
ararat :  0.55694956
amzi :  0.55148864



 43%|████████████████████████████████████▍                                                | 3/7 [00:41<00:55, 13.96s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:56<00:42, 14.16s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:10<00:28, 14.16s/it]

love : 
demetrius :  0.5901142
haggi :  0.58955586
point :  0.56621283
anethothite :  0.5522915
plow :  0.5503904



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:24<00:14, 14.13s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:38<00:00, 14.06s/it]


day : 
seeing :  0.5909259
figure :  0.58857274
teats :  0.58740646
sheaves :  0.56170315
propitiation :  0.55996084



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:26:24.460889: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 6 loss: 6.0018534660339355 

Epoch:  6
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:14<01:24, 14.07s/it]

holy : 
dash :  0.5958506
determined :  0.56684345
therein :  0.5593169
curseth :  0.5518258
such :  0.5274934



 29%|████████████████████████▎                                                            | 2/7 [00:28<01:10, 14.13s/it]

father : 
enquired :  0.58724624
arise :  0.5732074
inform :  0.56128293
ararat :  0.55694956
amzi :  0.55148864



 43%|████████████████████████████████████▍                                                | 3/7 [00:42<00:57, 14.26s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:56<00:42, 14.10s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:10<00:28, 14.02s/it]

love : 
haggi :  0.59082496
demetrius :  0.5892043
point :  0.5637653
shemida :  0.55344325
anethothite :  0.5517088



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:24<00:13, 13.97s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:38<00:00, 14.04s/it]


day : 
figure :  0.58993924
seeing :  0.58822274
teats :  0.5845016
sheaves :  0.56093895
propitiation :  0.5591151



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:28:06.084313: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 7 loss: 10.364392280578613 

Epoch:  7
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:14<01:24, 14.15s/it]

holy : 
dash :  0.5950135
determined :  0.5684231
therein :  0.5564735
curseth :  0.5508668
such :  0.53156936



 29%|████████████████████████▎                                                            | 2/7 [00:28<01:10, 14.06s/it]

father : 
enquired :  0.58723193
arise :  0.57422346
inform :  0.5616592
ararat :  0.5552557
amzi :  0.55120695



 43%|████████████████████████████████████▍                                                | 3/7 [00:42<00:56, 14.01s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:56<00:41, 13.99s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:10<00:28, 14.04s/it]

love : 
haggi :  0.5918924
demetrius :  0.5896574
point :  0.5617963
shemida :  0.55558705
anethothite :  0.5477153



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:24<00:14, 14.01s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:38<00:00, 14.05s/it]


day : 
figure :  0.5902983
seeing :  0.5845271
teats :  0.5828861
propitiation :  0.55904204
sheaves :  0.5584154



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:29:47.837376: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 8 loss: 7.321661472320557 

Epoch:  8
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:14<01:24, 14.00s/it]

holy : 
dash :  0.5941296
determined :  0.56969726
therein :  0.5536977
curseth :  0.5498846
such :  0.535075



 29%|████████████████████████▎                                                            | 2/7 [00:28<01:10, 14.03s/it]

father : 
enquired :  0.590675
arise :  0.57488567
inform :  0.5638484
ararat :  0.55444795
amzi :  0.55200195



 43%|████████████████████████████████████▍                                                | 3/7 [00:41<00:55, 13.98s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:55<00:41, 13.95s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:09<00:27, 13.92s/it]

love : 
haggi :  0.5927597
demetrius :  0.5899826
point :  0.55998623
shemida :  0.5574094
bell :  0.54625237



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:23<00:13, 13.90s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:37<00:00, 13.92s/it]


day : 
figure :  0.590488
teats :  0.5813371
seeing :  0.58114463
propitiation :  0.5588575
sheaves :  0.5560715



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:31:28.644709: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 9 loss: 4.676178455352783 

Epoch:  9
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:13<01:23, 13.94s/it]

holy : 
dash :  0.5932284
determined :  0.57073104
therein :  0.55102265
curseth :  0.5489037
such :  0.5381112



 29%|████████████████████████▎                                                            | 2/7 [00:27<01:09, 13.89s/it]

father : 
enquired :  0.5936405
arise :  0.5753641
inform :  0.56569636
ararat :  0.553614
amzi :  0.55260384



 43%|████████████████████████████████████▍                                                | 3/7 [00:41<00:55, 13.86s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:55<00:41, 13.86s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:09<00:27, 13.92s/it]

love : 
haggi :  0.59347063
demetrius :  0.5902123
shemida :  0.55897194
point :  0.5583227
bell :  0.54674023



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:23<00:13, 13.90s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:37<00:00, 13.89s/it]


day : 
figure :  0.5905566
teats :  0.57986623
seeing :  0.57804865
propitiation :  0.5585998
nimrim :  0.55641305



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:33:09.333070: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 10 loss: 11.312969207763672 

Epoch:  10
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:13<01:23, 13.84s/it]

holy : 
dash :  0.59233075
determined :  0.5715738
therein :  0.54846907
curseth :  0.5479411
such :  0.5407566



 29%|████████████████████████▎                                                            | 2/7 [00:27<01:09, 13.91s/it]

father : 
enquired :  0.59621
arise :  0.5757016
inform :  0.56726605
amzi :  0.553056
ararat :  0.5527757



 43%|████████████████████████████████████▍                                                | 3/7 [00:41<00:55, 13.86s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:55<00:41, 13.86s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:09<00:27, 13.87s/it]

love : 
haggi :  0.5940576
demetrius :  0.59037024
shemida :  0.5603214
point :  0.5567944
bell :  0.54713196



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:23<00:13, 13.86s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:37<00:00, 13.89s/it]


day : 
figure :  0.59053916
teats :  0.5784793
seeing :  0.57521486
propitiation :  0.558296
nimrim :  0.5568675



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:34:49.889989: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 11 loss: 4.25919246673584 

Epoch:  11
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:13<01:23, 13.88s/it]

holy : 
dash :  0.5914508
determined :  0.57226324
curseth :  0.5470081
therein :  0.5460483
such :  0.5430726



 29%|████████████████████████▎                                                            | 2/7 [00:27<01:09, 13.86s/it]

father : 
enquired :  0.5984476
arise :  0.5759307
inform :  0.5686066
amzi :  0.55339134
ararat :  0.5519484



 43%|████████████████████████████████████▍                                                | 3/7 [00:41<00:55, 13.89s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:55<00:41, 13.87s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:09<00:27, 13.77s/it]

love : 
haggi :  0.5939437
demetrius :  0.5906745
shemida :  0.56100476
point :  0.55559456
bell :  0.5484775



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:22<00:13, 13.74s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:36<00:00, 13.77s/it]


day : 
figure :  0.5904609
teats :  0.57717836
seeing :  0.57262105
propitiation :  0.5579653
nimrim :  0.5572145



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:36:29.619441: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 12 loss: 6.568507194519043 

Epoch:  12
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:13<01:22, 13.76s/it]

holy : 
dash :  0.59059864
determined :  0.572829
curseth :  0.5461124
such :  0.54510885
therein :  0.5437661



 29%|████████████████████████▎                                                            | 2/7 [00:27<01:08, 13.66s/it]

father : 
enquired :  0.60040474
arise :  0.57607657
inform :  0.56975687
amzi :  0.5536355
ararat :  0.55114275



 43%|████████████████████████████████████▍                                                | 3/7 [00:40<00:54, 13.62s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:54<00:40, 13.61s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:08<00:27, 13.60s/it]

love : 
haggi :  0.59378844
demetrius :  0.5908959
shemida :  0.56157017
point :  0.5544653
bell :  0.5496398



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:21<00:13, 13.60s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:35<00:00, 13.63s/it]


day : 
figure :  0.5903404
teats :  0.5759628
seeing :  0.5702469
propitiation :  0.5576211
nimrim :  0.5574782



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:38:08.278687: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 13 loss: 11.073118209838867 

Epoch:  13
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:13<01:21, 13.56s/it]

holy : 
dash :  0.5897808
determined :  0.5732945
such :  0.5469059
curseth :  0.54525876
therein :  0.54162395



 29%|████████████████████████▎                                                            | 2/7 [00:27<01:07, 13.50s/it]

father : 
enquired :  0.6021231
arise :  0.5761585
inform :  0.570748
amzi :  0.55380833
ararat :  0.55036616



 43%|████████████████████████████████████▍                                                | 3/7 [00:40<00:53, 13.48s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:54<00:40, 13.54s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:07<00:27, 13.55s/it]

love : 
haggi :  0.59360564
demetrius :  0.59105307
shemida :  0.5620395
point :  0.5534067
bell :  0.5506487



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:21<00:13, 13.56s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:35<00:00, 13.59s/it]


day : 
figure :  0.5901915
teats :  0.5748307
seeing :  0.56807387
nimrim :  0.5576768
propitiation :  0.5572735



  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]2023-02-06 14:39:46.626884: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
  0%|                                                                                          | 0/9785 [00:00<?, ?it/s]

Epoch 14 loss: 6.283019065856934 

Epoch:  14
Evaluation k-nearest neighbours using cosine similarity



 14%|████████████▏                                                                        | 1/7 [00:13<01:22, 13.80s/it]

holy : 
dash :  0.58900154
determined :  0.5736782
such :  0.54849696
curseth :  0.5444501
therein :  0.53962034



 29%|████████████████████████▎                                                            | 2/7 [00:27<01:08, 13.72s/it]

father : 
enquired :  0.603637
arise :  0.5761918
inform :  0.5716053
amzi :  0.5539255
ararat :  0.5496236



 43%|████████████████████████████████████▍                                                | 3/7 [00:41<00:54, 13.65s/it]

wine : 
ascend :  0.6042837
fir :  0.5993051
thrust :  0.57304835
fellowlabourer :  0.570156
alone :  0.56698203



 57%|████████████████████████████████████████████████▌                                    | 4/7 [00:54<00:40, 13.66s/it]

poison : 
occupy :  0.64646816
samson :  0.59120965
justify :  0.5836405
messenger :  0.57460153
curseth :  0.5726578



 71%|████████████████████████████████████████████████████████████▋                        | 5/7 [01:08<00:27, 13.64s/it]

love : 
haggi :  0.59340554
demetrius :  0.5911603
shemida :  0.5624307
point :  0.5524177
bell :  0.5515279



 86%|████████████████████████████████████████████████████████████████████████▊            | 6/7 [01:22<00:13, 13.67s/it]

strong : 
trench :  0.60901034
enquired :  0.59488213
discouraged :  0.5744839
testified :  0.5707761
shekels :  0.5499943



100%|█████████████████████████████████████████████████████████████████████████████████████| 7/7 [01:35<00:00, 13.66s/it]

day : 
figure :  0.5891353
teats :  0.57288754
seeing :  0.56620824
propitiation :  0.56146395
nimrim :  0.55860883






In [2]:
%load_ext tensorboard
%tensorboard --logdir="logs/{config}"