In [1]:
import pickle
import numpy as np
import pandas as pd
import torch
import datasets
from nltk.util import ngrams
import os
from tqdm import tqdm

In [2]:
with open("lemmatized_tweets.pkl","rb") as file_handle:
    data = pickle.load(file_handle)

In [3]:
data = data.apply(lambda x: " ".join(x)+"\n")

In [4]:
converted_raw_text = list(data)

In [5]:
truncated_raw_text = list(filter(lambda x: len(x) > 1,converted_raw_text))

In [6]:
with open("truncated-cleaned-tweets.txt","wt") as file_handle:

    file_handle.writelines(truncated_raw_text[:100000])

In [7]:
dset = datasets.load_dataset("text",data_files={"train":"truncated-cleaned-tweets.txt"})

Generating train split: 0 examples [00:00, ? examples/s]

In [8]:
def tokenize_tweets(single_row):

    single_row["tokenized-tweets"] = single_row["text"].split()
    return single_row

In [9]:
dset["train"] = dset["train"].map(tokenize_tweets)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

In [10]:
def convert_to_trigrams(single_row):

    single_row["tri-grams"] = list(ngrams(single_row["tokenized-tweets"],n=3))
    return single_row

In [11]:
dset["train"] = dset["train"].map(convert_to_trigrams)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

In [12]:
vocabulary = set()

for raw_text in truncated_raw_text[:100000]:
    vocabulary.update(raw_text.split())

In [13]:
vocab2idx = dict(zip(vocabulary,range(len(vocabulary))))

In [14]:
def convert_to_bigrams(single_row):

    center_token_target_token_pairs = list()

    for single_trigram in single_row["tri-grams"]:

        bigrams = list()
        
        bigrams.append([vocab2idx[single_trigram[1]],
                                                vocab2idx[single_trigram[0]]])
        bigrams.append([vocab2idx[single_trigram[1]],
                                                vocab2idx[single_trigram[2]]])
        center_token_target_token_pairs.append(bigrams)

    single_row["tri-grams"] = center_token_target_token_pairs

    return single_row

In [15]:
dset["train"] = dset["train"].map(convert_to_bigrams)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

In [16]:
input_token_target_token_pairs = list()

for single_tweet_bigrams in dset["train"]["tri-grams"]:
    for bigrams_list in single_tweet_bigrams:

        input_token_target_token_pairs.append(bigrams_list[0])
        input_token_target_token_pairs.append(bigrams_list[1])

In [17]:
class SkipGramDataset(torch.utils.data.Dataset):

    def __init__(self,input_target_pairs):
        self.data = input_target_pairs

    def __getitem__(self,index):
        return self.data[index]
    
    def __len__(self):
        return len(self.data)

In [18]:
training_data_obj = SkipGramDataset(input_token_target_token_pairs)

In [19]:
training_data_generator = torch.utils.data.DataLoader(training_data_obj,batch_size=32,
                                                     num_workers=os.cpu_count())

In [20]:
class Word2VecSkipGramNeuralNetwork(torch.nn.Module):

    def __init__(self,vocabulary_size,topic_vector_dim):
        super().__init__()

        self.hidden_layer = torch.nn.Embedding(num_embeddings=vocabulary_size,
                                               embedding_dim=topic_vector_dim)
        self.output_layer = torch.nn.Linear(in_features=topic_vector_dim,
                                            out_features=vocabulary_size)
        self.output_layer_activation = torch.nn.Softmax()

    
    def forward(self,center_token):

        embedding_layer_out = self.hidden_layer(center_token)
        linear_layer_out = self.output_layer(embedding_layer_out)
        nn_out = self.output_layer_activation(linear_layer_out)

        return nn_out

In [21]:
our_word2vec_skip_gram_nw = Word2VecSkipGramNeuralNetwork(len(vocab2idx),64)

In [22]:
our_word2vec_skip_gram_nw.to("cpu")

Word2VecSkipGramNeuralNetwork(
  (hidden_layer): Embedding(43953, 64)
  (output_layer): Linear(in_features=64, out_features=43953, bias=True)
  (output_layer_activation): Softmax(dim=None)
)

In [23]:
for mini_batch_idx, mini_batch in enumerate(training_data_generator):

    print("Index of Mini Batch is {}".format(mini_batch_idx))
    print("Center Token Mini Batch is {}".format(mini_batch[0]))
    print("Surrounding Token Mini Batch is {}".format(mini_batch[1]))
    break

Index of Mini Batch is 0
Center Token Mini Batch is tensor([19386, 19386,  7106,  7106, 28449, 28449, 22380, 22380, 31842, 31842,
        17423, 17423, 10753, 10753, 27082, 27082,  7421,  7421, 26052, 26052,
        30065, 30065, 37421, 37421, 27090, 27090, 34210, 34210, 19270, 19270,
        14919, 14919])
Surrounding Token Mini Batch is tensor([32552,  7106, 19386, 28449,  7106, 22380, 28449, 31842, 22380, 17423,
        31842, 10753, 17423, 27082, 10753, 30791, 39198, 26052,  7421, 30065,
        26052, 37421, 30065, 27090, 37421, 34210, 27090, 19270, 34210, 22878,
        17696, 19542])


In [24]:
epochs = 5
optimizer = torch.optim.Adam(params=our_word2vec_skip_gram_nw.parameters(),
                             lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()
progress_bar = tqdm(range(epochs * len(training_data_generator)))

for epoch in range(epochs):

    for mini_batch_idx, mini_batch in enumerate(training_data_generator):

        center_token_mini_batch = mini_batch[0]
        surrounding_token_mini_batch = mini_batch[1]

        center_token_mini_batch.to("cpu")
        surrounding_token_mini_batch.to("cpu")

        optimizer.zero_grad()

        if (mini_batch_idx+1) % 1000 == 0:
            print("Epoch # {}, Time Step # {}, Loss = {}".format(epoch,(mini_batch_idx+1),
                                                             loss_fn_value))

        y_pred = our_word2vec_skip_gram_nw(center_token_mini_batch)

        loss_fn_value = loss_fn(y_pred,surrounding_token_mini_batch)
        loss_fn_value.backward()

        optimizer.step()
        progress_bar.update(1)

  return self._call_impl(*args, **kwargs)
  1%|          | 1001/124375 [01:01<2:05:46, 16.35it/s]

Epoch # 0, Time Step # 1000, Loss = 10.690884590148926


  2%|▏         | 2001/124375 [02:02<2:03:05, 16.57it/s]

Epoch # 0, Time Step # 2000, Loss = 10.658848762512207


  2%|▏         | 3001/124375 [03:03<2:06:10, 16.03it/s]

Epoch # 0, Time Step # 3000, Loss = 10.659706115722656


  3%|▎         | 4001/124375 [04:04<2:00:03, 16.71it/s]

Epoch # 0, Time Step # 4000, Loss = 10.659699440002441


  4%|▍         | 5001/124375 [05:04<1:58:12, 16.83it/s]

Epoch # 0, Time Step # 5000, Loss = 10.65968132019043


  5%|▍         | 6001/124375 [06:06<2:01:24, 16.25it/s]

Epoch # 0, Time Step # 6000, Loss = 10.659707069396973


  6%|▌         | 7001/124375 [07:06<1:57:39, 16.63it/s]

Epoch # 0, Time Step # 7000, Loss = 10.659688949584961


  6%|▋         | 8001/124375 [09:22<1:54:36, 16.92it/s]  

Epoch # 0, Time Step # 8000, Loss = 10.659307479858398


  7%|▋         | 9001/124375 [10:25<1:59:49, 16.05it/s]

Epoch # 0, Time Step # 9000, Loss = 10.646452903747559


  8%|▊         | 10001/124375 [11:27<1:58:09, 16.13it/s]

Epoch # 0, Time Step # 10000, Loss = 10.69095230102539


  9%|▉         | 11001/124375 [12:29<1:56:54, 16.16it/s]

Epoch # 0, Time Step # 11000, Loss = 10.659688949584961


 10%|▉         | 12001/124375 [13:30<1:53:49, 16.45it/s]

Epoch # 0, Time Step # 12000, Loss = 10.628466606140137


 10%|█         | 13001/124375 [14:32<1:49:52, 16.89it/s]

Epoch # 0, Time Step # 13000, Loss = 10.628433227539062


 11%|█▏        | 14001/124375 [15:33<1:53:19, 16.23it/s]

Epoch # 0, Time Step # 14000, Loss = 10.690953254699707


 12%|█▏        | 15001/124375 [16:33<1:47:24, 16.97it/s]

Epoch # 0, Time Step # 15000, Loss = 10.690937995910645


 13%|█▎        | 16001/124375 [17:33<1:50:08, 16.40it/s]

Epoch # 0, Time Step # 16000, Loss = 10.690890312194824


 14%|█▎        | 17001/124375 [18:34<1:45:59, 16.88it/s]

Epoch # 0, Time Step # 17000, Loss = 10.659684181213379


 14%|█▍        | 18001/124375 [19:34<1:45:11, 16.85it/s]

Epoch # 0, Time Step # 18000, Loss = 10.628458976745605


 15%|█▌        | 19001/124375 [20:34<1:44:16, 16.84it/s]

Epoch # 0, Time Step # 19000, Loss = 10.690940856933594


 16%|█▌        | 20001/124375 [21:34<1:42:13, 17.02it/s]

Epoch # 0, Time Step # 20000, Loss = 10.690949440002441


 17%|█▋        | 21001/124375 [22:34<1:41:20, 17.00it/s]

Epoch # 0, Time Step # 21000, Loss = 10.628458023071289


 18%|█▊        | 22001/124375 [23:34<1:40:15, 17.02it/s]

Epoch # 0, Time Step # 22000, Loss = 10.659574508666992


 18%|█▊        | 23001/124375 [24:34<1:41:14, 16.69it/s]

Epoch # 0, Time Step # 23000, Loss = 10.659696578979492


 19%|█▉        | 24001/124375 [25:34<1:44:00, 16.08it/s]

Epoch # 0, Time Step # 24000, Loss = 10.690946578979492


 21%|██        | 25877/124375 [27:27<1:36:51, 16.95it/s]

Epoch # 1, Time Step # 1000, Loss = 10.690953254699707


 22%|██▏       | 26877/124375 [28:28<1:37:14, 16.71it/s]

Epoch # 1, Time Step # 2000, Loss = 10.625658988952637


 22%|██▏       | 27877/124375 [29:28<1:37:18, 16.53it/s]

Epoch # 1, Time Step # 3000, Loss = 10.628453254699707


 23%|██▎       | 28877/124375 [30:28<1:33:07, 17.09it/s]

Epoch # 1, Time Step # 4000, Loss = 10.659699440002441


 24%|██▍       | 29877/124375 [31:28<1:32:53, 16.96it/s]

Epoch # 1, Time Step # 5000, Loss = 10.628447532653809


 25%|██▍       | 30877/124375 [32:28<1:35:21, 16.34it/s]

Epoch # 1, Time Step # 6000, Loss = 10.659708976745605


 26%|██▌       | 31877/124375 [33:28<1:37:32, 15.80it/s]

Epoch # 1, Time Step # 7000, Loss = 10.628457069396973


 26%|██▋       | 32877/124375 [34:28<1:31:11, 16.72it/s]

Epoch # 1, Time Step # 8000, Loss = 10.59720516204834


 27%|██▋       | 33877/124375 [35:28<1:28:08, 17.11it/s]

Epoch # 1, Time Step # 9000, Loss = 10.59720516204834


 28%|██▊       | 34877/124375 [36:28<1:27:06, 17.12it/s]

Epoch # 1, Time Step # 10000, Loss = 10.690958976745605


 29%|██▉       | 35877/124375 [37:29<1:28:38, 16.64it/s]

Epoch # 1, Time Step # 11000, Loss = 10.659667015075684


 30%|██▉       | 36877/124375 [38:30<1:28:13, 16.53it/s]

Epoch # 1, Time Step # 12000, Loss = 10.628457069396973


 30%|███       | 37877/124375 [39:31<1:24:52, 16.98it/s]

Epoch # 1, Time Step # 13000, Loss = 10.62845516204834


 31%|███▏      | 38877/124375 [40:30<1:26:45, 16.43it/s]

Epoch # 1, Time Step # 14000, Loss = 10.690958976745605


 32%|███▏      | 39877/124375 [41:31<1:24:42, 16.63it/s]

Epoch # 1, Time Step # 15000, Loss = 10.690954208374023


 33%|███▎      | 40877/124375 [42:31<1:28:13, 15.77it/s]

Epoch # 1, Time Step # 16000, Loss = 10.628451347351074


 34%|███▎      | 41877/124375 [43:32<1:22:56, 16.58it/s]

Epoch # 1, Time Step # 17000, Loss = 10.659709930419922


 34%|███▍      | 42877/124375 [44:33<1:18:50, 17.23it/s]

Epoch # 1, Time Step # 18000, Loss = 10.628451347351074


 35%|███▌      | 43877/124375 [45:34<1:22:46, 16.21it/s]

Epoch # 1, Time Step # 19000, Loss = 10.69095516204834


 36%|███▌      | 44877/124375 [46:34<1:22:27, 16.07it/s]

Epoch # 1, Time Step # 20000, Loss = 10.690949440002441


 37%|███▋      | 45875/124375 [47:35<1:30:57, 14.38it/s]

Epoch # 1, Time Step # 21000, Loss = 10.628458023071289


 38%|███▊      | 46877/124375 [48:36<1:17:24, 16.68it/s]

Epoch # 1, Time Step # 22000, Loss = 10.597207069396973


 38%|███▊      | 47877/124375 [49:37<1:16:48, 16.60it/s]

Epoch # 1, Time Step # 23000, Loss = 10.659708023071289


 39%|███▉      | 48877/124375 [50:38<1:15:46, 16.60it/s]

Epoch # 1, Time Step # 24000, Loss = 10.690953254699707


 41%|████      | 50751/124375 [52:27<1:09:45, 17.59it/s]

Epoch # 2, Time Step # 1000, Loss = 10.690958976745605


 42%|████▏     | 51751/124375 [53:24<1:08:32, 17.66it/s]

Epoch # 2, Time Step # 2000, Loss = 10.597207069396973


 42%|████▏     | 52751/124375 [54:22<1:05:38, 18.18it/s]

Epoch # 2, Time Step # 3000, Loss = 10.659708023071289


 43%|████▎     | 53751/124375 [55:18<1:03:02, 18.67it/s]

Epoch # 2, Time Step # 4000, Loss = 10.659708976745605


 44%|████▍     | 54751/124375 [56:14<1:05:31, 17.71it/s]

Epoch # 2, Time Step # 5000, Loss = 10.628457069396973


 45%|████▍     | 55751/124375 [57:10<1:03:50, 17.91it/s]

Epoch # 2, Time Step # 6000, Loss = 10.690958976745605


 46%|████▌     | 56751/124375 [58:06<1:02:42, 17.97it/s]

Epoch # 2, Time Step # 7000, Loss = 10.62845516204834


 46%|████▋     | 57751/124375 [59:01<1:03:18, 17.54it/s]

Epoch # 2, Time Step # 8000, Loss = 10.597208023071289


 47%|████▋     | 58751/124375 [59:57<1:03:18, 17.27it/s]

Epoch # 2, Time Step # 9000, Loss = 10.597204208374023


 48%|████▊     | 59751/124375 [1:00:53<1:01:41, 17.46it/s]

Epoch # 2, Time Step # 10000, Loss = 10.690958976745605


 49%|████▉     | 60751/124375 [1:01:49<56:57, 18.61it/s]  

Epoch # 2, Time Step # 11000, Loss = 10.659689903259277


 50%|████▉     | 61751/124375 [1:02:44<58:25, 17.86it/s]  

Epoch # 2, Time Step # 12000, Loss = 10.62845516204834


 50%|█████     | 62751/124375 [1:03:40<58:03, 17.69it/s]  

Epoch # 2, Time Step # 13000, Loss = 10.659697532653809


 51%|█████▏    | 63752/124375 [1:04:39<54:21, 18.59it/s]  

Epoch # 2, Time Step # 14000, Loss = 10.690958976745605


 52%|█████▏    | 64752/124375 [1:05:37<56:02, 17.73it/s]  

Epoch # 2, Time Step # 15000, Loss = 10.690958976745605


 53%|█████▎    | 65752/124375 [1:06:36<1:00:31, 16.14it/s]

Epoch # 2, Time Step # 16000, Loss = 10.62845516204834


 54%|█████▎    | 66751/124375 [1:11:19<53:19, 18.01it/s]    

Epoch # 2, Time Step # 17000, Loss = 10.659708023071289


 54%|█████▍    | 67751/124375 [1:12:17<54:07, 17.43it/s]  

Epoch # 2, Time Step # 18000, Loss = 10.62845230102539


 55%|█████▌    | 68751/124375 [1:13:17<59:11, 15.66it/s]  

Epoch # 2, Time Step # 19000, Loss = 10.690951347351074


 56%|█████▌    | 69751/124375 [1:14:21<53:50, 16.91it/s]  

Epoch # 2, Time Step # 20000, Loss = 10.690957069396973


 57%|█████▋    | 70751/124375 [1:15:25<1:00:41, 14.72it/s]

Epoch # 2, Time Step # 21000, Loss = 10.628458023071289


 58%|█████▊    | 71751/124375 [1:16:28<54:02, 16.23it/s]  

Epoch # 2, Time Step # 22000, Loss = 10.597207069396973


 58%|█████▊    | 72751/124375 [1:17:30<47:28, 18.12it/s]  

Epoch # 2, Time Step # 23000, Loss = 10.659693717956543


 59%|█████▉    | 73751/124375 [1:18:26<44:06, 19.13it/s]  

Epoch # 2, Time Step # 24000, Loss = 10.690949440002441


 61%|██████    | 75626/124375 [1:20:17<49:13, 16.51it/s]  

Epoch # 3, Time Step # 1000, Loss = 10.690958976745605


 62%|██████▏   | 76627/124375 [1:21:16<47:48, 16.64it/s]

Epoch # 3, Time Step # 2000, Loss = 10.597207069396973


 62%|██████▏   | 77626/124375 [1:22:16<46:29, 16.76it/s]

Epoch # 3, Time Step # 3000, Loss = 10.659698486328125


 63%|██████▎   | 78627/124375 [1:23:15<45:46, 16.66it/s]

Epoch # 3, Time Step # 4000, Loss = 10.659708976745605


 64%|██████▍   | 79625/124375 [1:24:15<44:05, 16.91it/s]

Epoch # 3, Time Step # 5000, Loss = 10.628445625305176


 65%|██████▍   | 80627/124375 [1:25:27<46:43, 15.60it/s]  

Epoch # 3, Time Step # 6000, Loss = 10.690957069396973


 66%|██████▌   | 81627/124375 [1:26:28<43:00, 16.57it/s]  

Epoch # 3, Time Step # 7000, Loss = 10.628454208374023


 66%|██████▋   | 82626/124375 [1:27:28<40:02, 17.38it/s]

Epoch # 3, Time Step # 8000, Loss = 10.597207069396973


 67%|██████▋   | 83626/124375 [1:28:28<44:18, 15.33it/s]

Epoch # 3, Time Step # 9000, Loss = 10.597203254699707


 68%|██████▊   | 84626/124375 [1:29:28<40:06, 16.52it/s]

Epoch # 3, Time Step # 10000, Loss = 10.69095516204834


 69%|██████▉   | 85627/124375 [1:30:27<40:52, 15.80it/s]

Epoch # 3, Time Step # 11000, Loss = 10.659703254699707


 70%|██████▉   | 86627/124375 [1:31:27<38:05, 16.51it/s]

Epoch # 3, Time Step # 12000, Loss = 10.628457069396973


 70%|███████   | 87626/124375 [1:32:26<36:55, 16.59it/s]

Epoch # 3, Time Step # 13000, Loss = 10.628451347351074


 71%|███████▏  | 88627/124375 [1:33:26<35:39, 16.71it/s]

Epoch # 3, Time Step # 14000, Loss = 10.690958976745605


 72%|███████▏  | 89627/124375 [1:34:26<36:09, 16.02it/s]

Epoch # 3, Time Step # 15000, Loss = 10.690957069396973


 73%|███████▎  | 90626/124375 [1:35:26<34:13, 16.43it/s]

Epoch # 3, Time Step # 16000, Loss = 10.628450393676758


 74%|███████▎  | 91624/124375 [1:36:25<17:44, 30.76it/s]

Epoch # 3, Time Step # 17000, Loss = 10.659703254699707


 74%|███████▍  | 92627/124375 [1:37:25<31:36, 16.74it/s]

Epoch # 3, Time Step # 18000, Loss = 10.628458023071289


 75%|███████▌  | 93627/124375 [1:38:25<30:36, 16.75it/s]

Epoch # 3, Time Step # 19000, Loss = 10.690958976745605


 76%|███████▌  | 94626/124375 [1:39:24<29:55, 16.57it/s]

Epoch # 3, Time Step # 20000, Loss = 10.690958976745605


 77%|███████▋  | 95626/124375 [1:40:23<28:39, 16.72it/s]

Epoch # 3, Time Step # 21000, Loss = 10.628458023071289


 78%|███████▊  | 96626/124375 [1:41:21<28:04, 16.47it/s]

Epoch # 3, Time Step # 22000, Loss = 10.597207069396973


 78%|███████▊  | 97626/124375 [1:42:19<26:16, 16.96it/s]

Epoch # 3, Time Step # 23000, Loss = 10.659703254699707


 79%|███████▉  | 98626/124375 [1:43:18<25:31, 16.81it/s]

Epoch # 3, Time Step # 24000, Loss = 10.690949440002441


 81%|████████  | 100501/124375 [1:45:11<25:47, 15.43it/s]

Epoch # 4, Time Step # 1000, Loss = 10.690957069396973


 82%|████████▏ | 101501/124375 [1:46:11<22:46, 16.75it/s]

Epoch # 4, Time Step # 2000, Loss = 10.597207069396973


 82%|████████▏ | 102501/124375 [1:47:13<23:49, 15.30it/s]

Epoch # 4, Time Step # 3000, Loss = 10.659703254699707


 83%|████████▎ | 103501/124375 [1:48:13<20:50, 16.70it/s]

Epoch # 4, Time Step # 4000, Loss = 10.659708976745605


 84%|████████▍ | 104502/124375 [1:49:13<19:34, 16.92it/s]

Epoch # 4, Time Step # 5000, Loss = 10.59720516204834


 85%|████████▍ | 105501/124375 [1:50:13<18:57, 16.59it/s]

Epoch # 4, Time Step # 6000, Loss = 10.69095516204834


 86%|████████▌ | 106501/124375 [1:51:12<17:56, 16.60it/s]

Epoch # 4, Time Step # 7000, Loss = 10.628453254699707


 86%|████████▋ | 107501/124375 [1:52:13<16:56, 16.60it/s]

Epoch # 4, Time Step # 8000, Loss = 10.597207069396973


 87%|████████▋ | 108501/124375 [1:53:12<16:12, 16.33it/s]

Epoch # 4, Time Step # 9000, Loss = 10.597203254699707


 88%|████████▊ | 109501/124375 [1:54:11<15:16, 16.22it/s]

Epoch # 4, Time Step # 10000, Loss = 10.690958976745605


 89%|████████▉ | 110502/124375 [1:55:11<13:48, 16.74it/s]

Epoch # 4, Time Step # 11000, Loss = 10.659699440002441


 90%|████████▉ | 111502/124375 [1:56:10<12:54, 16.62it/s]

Epoch # 4, Time Step # 12000, Loss = 10.62845516204834


 90%|█████████ | 112501/124375 [1:57:09<11:49, 16.73it/s]

Epoch # 4, Time Step # 13000, Loss = 10.628453254699707


 91%|█████████▏| 113502/124375 [1:58:09<10:56, 16.57it/s]

Epoch # 4, Time Step # 14000, Loss = 10.690958976745605


 92%|█████████▏| 114502/124375 [1:59:09<10:12, 16.12it/s]

Epoch # 4, Time Step # 15000, Loss = 10.690958976745605


 93%|█████████▎| 115502/124375 [2:00:09<08:58, 16.48it/s]

Epoch # 4, Time Step # 16000, Loss = 10.628453254699707


 94%|█████████▎| 116500/124375 [2:01:10<08:07, 16.14it/s]

Epoch # 4, Time Step # 17000, Loss = 10.659708023071289


 94%|█████████▍| 117501/124375 [2:02:10<06:50, 16.73it/s]

Epoch # 4, Time Step # 18000, Loss = 10.628458023071289


 95%|█████████▌| 118501/124375 [2:03:10<05:59, 16.34it/s]

Epoch # 4, Time Step # 19000, Loss = 10.69095516204834


 96%|█████████▌| 119502/124375 [2:04:10<05:24, 15.04it/s]

Epoch # 4, Time Step # 20000, Loss = 10.690953254699707


 97%|█████████▋| 120501/124375 [2:05:11<03:56, 16.38it/s]

Epoch # 4, Time Step # 21000, Loss = 10.628458023071289


 98%|█████████▊| 121501/124375 [2:06:10<03:01, 15.84it/s]

Epoch # 4, Time Step # 22000, Loss = 10.597207069396973


 98%|█████████▊| 122501/124375 [2:07:09<01:52, 16.65it/s]

Epoch # 4, Time Step # 23000, Loss = 10.65970516204834


 99%|█████████▉| 123502/124375 [2:08:09<00:54, 15.92it/s]

Epoch # 4, Time Step # 24000, Loss = 10.690957069396973


100%|█████████▉| 124374/124375 [2:09:01<00:00, 16.47it/s]

100%|██████████| 124375/124375 [2:09:14<00:00, 16.47it/s]