In [1]:
# The story telling algorithm with distributed similarity mechanism and diffusion model for all documents in the dataset
# Author: Alireza Nouri
# email: apashamoham@miners.utep.edu
# date 12/16/2023

In [1]:
#import packages
from transformers import BertTokenizer

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity
from sklearn.manifold import TSNE

import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset

import matplotlib.pyplot as plt

import sys
sys.path.append('..')
from dataset_reader import dataset_loader
from contextual_embedding.contextual_embedding import bert_embedding

import seaborn as sns
import numpy as np
import pickle
import pandas as pd
from tqdm import tqdm
import copy
import random 

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import tensorflow as tf
from tensorflow.keras import layers, Model
import nltk

2023-12-16 11:48:41.434249: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-16 11:48:41.434280: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-16 11:48:41.434723: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-16 11:48:41.484158: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
# Load dataset
#read csv file
path = '../../../Dataset/NYTimes/AHNAF_NYTimes/2022.csv'
df = pd.read_csv(path)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# print(df.columns)
# print(df.shape)
df = df[df['section_name'].isin(['U.S.', 'World'])]
#select the columns
df_selected = df[['pub_date', 'abstract', 'headline', 'article', 'keywords', 'section_name', 'subsection_name', 'web_url']]

# drop none values elements
# print(df_selected.shape)
# df_selected.dropna(axis=0, how='any')
# print(df_selected.shape)

def cleaning_text(text):
    return str(text).replace('##', ' ')

#convert dataframe into a list of dictionaries, each one has all information and id
original_data = []
columns = df_selected.columns

for ind, row in df_selected.iterrows():
    try:
        temp_dict = {}
        temp_dict['id'] = ind

        for el in columns:
            temp_dict[el] = row[el]
        temp_dict['article'] = cleaning_text(temp_dict['article'])
        original_data.append(temp_dict)
    except:
        continue
    


In [4]:

# nltk.download('stopwords')
stopwords = nltk.corpus.stopwords.words('english')

In [5]:
from copy import deepcopy, copy
import warnings
warnings.filterwarnings("ignore")

MAX_TOKENS_LENGTH = 400
MIN_TOKENS_LENGTH = 100

In [8]:
# pick random samples from dataset
n_samples = len(original_data)
original_data = random.sample(original_data, n_samples)

# eliminate data with large size of text

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

dataset = []
for data in tqdm(original_data):
    try:
        if data['article'] != 'nan':
            z = tokenizer.tokenize(data['article'])
            if (len(z) > MIN_TOKENS_LENGTH) and (len(z) < MAX_TOKENS_LENGTH):
                dataset.append(data)
    except:
        continue
        
# pickle.dump(dataset, open('25_dataset.p', 'wb'))
# dataset = pickle.load(open('25_dataset.p', 'rb'))

# generate document embedding by BERT
context_em = bert_embedding()
contextual_bert_embedding = []
for doc in tqdm(dataset):
    contextual_bert_embedding.append([doc['id'], context_em.embedding_generator(doc['article'], embedding_type='summing')[1][0][1]])

contextual_doc_embedding =  deepcopy(contextual_bert_embedding)   
# contextual_doc_embedding[id, bert_cls_embedding]    


100%|██████████| 10656/10656 [00:21<00:00, 485.65it/s]
100%|██████████| 8338/8338 [09:59<00:00, 13.91it/s]


In [None]:
# pickle.dump(contextual_doc_embedding, open('25_contextual_doc_embedding.p', 'wb'))
# pickle.dump(dataset, open('25_dataset.p', 'wb'))
# contextual_doc_embedding = pickle.load(open('25_contextual_doc_embedding.p', 'rb'))
# dataset = pickle.load(open('25_dataset.p', 'rb'))

In [8]:
def resize_embedding_vector(embedding_matrix, input_dim, output_dim, original_embedding_matrix=contextual_doc_embedding, epochs_n=32):
    compressed_dim = output_dim
    # Encoder model
    input_vec = layers.Input(shape=(input_dim,))
    encoded = layers.Dense(compressed_dim, activation='relu')(input_vec)
    
    # Decoder model
    decoded = layers.Dense(input_dim, activation='sigmoid')(encoded)
    
    # Autoencoder
    autoencoder = Model(input_vec, decoded)
    
    # Encoder - for compressing the input
    encoder = Model(input_vec, encoded)
    
    # Decoder - for reconstructing the input from compressed form
    encoded_input = layers.Input(shape=(compressed_dim,))
    decoder_layer = autoencoder.layers[-1]
    decoder = Model(encoded_input, decoder_layer(encoded_input))
    
    # Compile the model
    autoencoder.compile(optimizer='adam', loss='mean_squared_error')
    
    # # Model summary
    # autoencoder.summary()
    
    # Train the model
    autoencoder.fit(embedding_matrix, embedding_matrix, epochs=epochs_n, batch_size=64, shuffle=True)
    temp = encoder.predict(embedding_matrix)
    temp_dict = dict()
    for id, vec in zip(original_embedding_matrix, temp):
        temp_dict[id[0]] = vec
        
    return temp_dict

In [7]:
# pickle.dump(contextual_doc_embedding, open('25_contextual_doc_embedding.p', 'wb'))
# pickle.dump(dataset, open('25_dataset.p', 'wb'))
# contextual_doc_embedding = pickle.load(open('25_contextual_doc_embedding.p', 'rb'))
# dataset = pickle.load(open('25_dataset.p', 'rb'))

In [None]:
# # print(type(contextual_doc_embedding))
# # print(type(contextual_bert_embedding))
# # print(type(contextual_doc_embedding[21517]))
# # print(type(contextual_bert_embedding[0][1]))
# # new_context = []
# # for k in contextual_doc_embedding:
# #     new_context.append([k, torch.tensor(contextual_doc_embedding[k])])
# contextual_doc_embedding = new_context

In [None]:
title_id_dict = dict()
for data in dataset:
    title_id_dict[data['id']] = data['headline']

abstract_id_dict = dict()
for data in dataset:
    abstract_id_dict[data['id']] = data['abstract']

text_id_dict = dict()
for data in dataset:
    text_id_dict[data['id']] = data['article']

date_id_dict = dict()
for data in dataset:
    date_id_dict[data['id']] = data['pub_date']

In [None]:
# pickle.dump(title_id_dict, open('25_title_id_dict.p', 'wb'))
# pickle.dump(abstract_id_dict, open('25_abstract_id_dict.p', 'wb'))
# pickle.dump(text_id_dict, open('25_text_id_dict.p', 'wb'))
# pickle.dump(date_id_dict, open('25_date_id_dict.p', 'wb'))
# pickle.dump(tf_idf_dict, open('25_tf_idf_dict.p', 'wb'))
# pickle.dump(data_timestamps, open('25_data_timestamps.p', 'wb'))
# pickle.dump(contextual_doc_embedding, open('25_contextual_doc_embedding.p', 'wb'))
# pickle.dump(contextual_bert_embedding, open('25_contextual_bert_embedding.p', 'wb'))
# pickle.dump(contextual_dict, open('25_contextual_dict.p','wb'))


In [10]:
####################Reduce the size of number of documnets
embedding_col = [w[1].tolist() for w in contextual_doc_embedding]
contextual_doc_embedding = resize_embedding_vector(embedding_col, len(embedding_col[0]), 128, contextual_doc_embedding)
new_context = []
for k in contextual_doc_embedding:
    new_context.append([k, torch.tensor(contextual_doc_embedding[k])])
contextual_doc_embedding = new_context

contextual_dict = dict()
for data_ in contextual_doc_embedding:
    contextual_dict[data_[0]] = data_[1]

2023-12-16 11:49:34.430108: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-16 11:49:34.430390: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-16 11:49:34.443962: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Epoch 1/32


2023-12-16 11:49:56.210512: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fb7d42795d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-12-16 11:49:56.210530: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA RTX A6000, Compute Capability 8.6
2023-12-16 11:49:56.210535: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (1): NVIDIA RTX A6000, Compute Capability 8.6
2023-12-16 11:49:56.215588: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-12-16 11:49:56.250595: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8700
2023-12-16 11:49:56.318231: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


In [11]:
# pickle.dump(contextual_doc_embedding, open('25_contextual_doc_embedding.p', 'wb'))
# pickle.dump(dataset, open('25_dataset.p', 'wb'))

In [11]:
# Create the TF-IDF vectorizer
vectorizer_tfidf = TfidfVectorizer(stop_words=stopwords)

# Fit the vectorizer to the documents and transform the documents into their TF-IDF representation
tfidf_matrix = vectorizer_tfidf.fit_transform([w['article'] for w in dataset])
list_of_words_in_tfidf = vectorizer_tfidf.get_feature_names_out()
words_list_tfidf = tfidf_matrix.toarray()

tf_idf_dict = dict()
for data, id in zip(words_list_tfidf, dataset):
    tf_idf_dict[id['id']] = data

In [16]:
# in this section, a model uses document embedding to predict the tf-idf
docemb_tfidf = []
for emb, tf_idf in zip(contextual_doc_embedding, words_list_tfidf):
    docemb_tfidf.append([emb[1], tf_idf])

embeddings_size = len(docemb_tfidf[0][0])
X_embeddings = np.random.rand(len(contextual_doc_embedding), embeddings_size)
X_tfidf = np.random.rand(len(contextual_doc_embedding), len(docemb_tfidf[0][1]))
for i in range(len(X_embeddings)):
    X_embeddings[i] = docemb_tfidf[i][0]

for i in range(len(X_tfidf)):
    X_tfidf[i] = docemb_tfidf[i][1]

# Neural Network
model_tfidf = Sequential([
    Dense(2*embeddings_size, activation='relu', input_shape=(embeddings_size,)),
    # Dense(len(docemb_tfidf[0][1])/4, activation='relu'),
    Dense(len(docemb_tfidf[0][1]), activation='linear')  # Output size is vocabulary size
])

model_tfidf.compile(optimizer='adam', loss='mse')
model_tfidf.summary()

# Train the model
model_tfidf.fit(X_embeddings[:1000], X_tfidf[:1000], epochs=50, batch_size=512)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 256)               33024     
                                                                 
 dense_3 (Dense)             (None, 39808)             10230656  
                                                                 
Total params: 10263680 (39.15 MB)
Trainable params: 10263680 (39.15 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50

<keras.src.callbacks.History at 0x7f9fa164bdf0>

In [13]:
# collect all timestamps
all_timestamps = set()
for i in dataset:
    all_timestamps.add(i['pub_date'][5:])
sorted_ts = sorted(list(all_timestamps))

timestamp_width = 3
ts_groups = [sorted_ts[timestamp_width*i:timestamp_width*i+3] for i in range(int(len(sorted_ts)/timestamp_width))]
ts_dict = dict()
for i in range(len(ts_groups)):
    ts_dict[i] = ts_groups[i]
    
data_timestamps = dict()
for w in ts_dict.keys():
    data_timestamps[w] = []
    
for d, emb, tf_ in zip(dataset, contextual_doc_embedding, words_list_tfidf):
    for ke, val in ts_dict.items():
        if d['pub_date'][5:] in val:
            data_timestamps[ke].append([d['id'], d['pub_date'][5:], d['article'], emb[1], tf_])

In [14]:
centroids_ts_group = [] # centroids in each timestamp
emb_and_centroids = []

for i in tqdm(data_timestamps):
    emb_groups = [w[3].tolist() for w in data_timestamps[i]]
    N_CLUSTERS = int(len(emb_groups)/3) # this is hardcoding the number of clusters. later on, it must changed to dynamic approaches
    kmeans = KMeans(n_clusters=N_CLUSTERS)
    kmeans.fit(emb_groups)
    labels = kmeans.labels_
    centroids = kmeans.cluster_centers_
    centroids_ts_group.append(centroids)
    temp_list = []
    for ind, emb in zip(labels, emb_groups):
        temp_list.append([emb, centroids[ind]])
    emb_and_centroids.append(temp_list)
        
  #emb_and_centroids : [timestamps][number_of_clusters][0: doc_embedding, 1: centroids]  

100%|██████████| 88/88 [00:20<00:00,  4.39it/s]


In [15]:
emb_and_conditions = []
for i in range(len(data_timestamps)-1) :
    emb_groups = [w[3].tolist() for w in data_timestamps[i]]
    temp_list = []
    for em in emb_groups:
        for cond_set in emb_and_centroids[i+1]:
            temp_list.append([em, cond_set])
    emb_and_conditions.append(temp_list)

data_for_diffusion = []
for t_ in emb_and_conditions:
    for c_ in t_:
        data_for_diffusion.append(c_)
# data_for_diffusion : [[[document_embedding][condition1,condition2]], [[document_embedding][condition1,condition2]]]

In [38]:
# make the training dataset smaller
# data_for_diffusion = data_for_diffusion[:10000]


vec_size = len(contextual_doc_embedding[0][1])
new_data_w_condition = torch.randn((len(data_for_diffusion),3*vec_size))
for i in tqdm(range(len(data_for_diffusion))):
    for j in range(vec_size):
        new_data_w_condition[i][j] = data_for_diffusion[i][0][j]
        new_data_w_condition[i][j+vec_size] = data_for_diffusion[i][1][0][j]
        new_data_w_condition[i][j+2*vec_size] = data_for_diffusion[i][1][1][j]
        
new_data_ = torch.randn((len(data_for_diffusion),len(contextual_doc_embedding[0][1])))
for i in tqdm(range(len(data_for_diffusion))):
    for j in range(len(contextual_doc_embedding[0][1])):
        new_data_[i][j] = data_for_diffusion[i][0][j]

100%|██████████| 10000/10000 [00:11<00:00, 859.03it/s]
100%|██████████| 10000/10000 [00:03<00:00, 2847.03it/s]


In [19]:
# pickle.dump(new_data_, open('25_new_data_.p', 'wb'))
# pickle.dump(model_tfidf, open('25_model_tfidf.p', 'wb'))
# pickle.dump(new_data_w_condition, open('25_new_data_w_condition.p', 'wb'))
# new_data_ = pickle.load(open('25_new_data_.p', 'rb'))
# model_tfidf = pickle.load(open('25_model_tfidf.p', 'rb'))
# new_data_w_condition = pickle.load(open('25_new_data_w_condition.p', 'rb'))

In [39]:
###############number of stpes calculated separatly in 02_Diffused_based_storytelling file #############
# # Calculating the parameters for the diffusion model

# # calculate the number of steps to add noises into data to feed them into the diffusion model
# noise_factor_list = [0.15]
# data_final = []
# for noise_factor in noise_factor_list:
#     n_steps = 300
#     data_no_noise = torch.randn((len(contextual_doc_embedding),len(contextual_doc_embedding[0][1])))
#     for i in range(len(contextual_doc_embedding)):
#         for j in range(len(contextual_doc_embedding[0][1])):
#             data_no_noise[i][j] = contextual_doc_embedding[i][1][j]
#     data_final.append(data_no_noise)
# #     data_final.append(data_no_noise[:100][:]) this line has been changed to upper line

# diffused_data_steps_final = []
# for i in range(len(data_final)):
#     vec_size = len(contextual_doc_embedding[0][1])

#     diffused_data_steps = [data_final[i]]
#     for _ in range(n_steps):
#         data_final[i] = data_final[i] + noise_factor * torch.randn_like(data_final[i])
#         diffused_data_steps.append(data_final[i])
#     diffused_data_steps_final.append(diffused_data_steps)

# similarity_noise_final = []
# for m in range(len(data_final)):
#     similarity_noise = {}
#     for i in range(n_steps+1):
#         similarity_noise[i] = []


#     for i in tqdm(range(n_steps+1)):
#         temp = []
#         for j in range(len(diffused_data_steps_final[m][0])):
#             temp.append(cosine_similarity([diffused_data_steps_final[m][0][j].tolist()],[diffused_data_steps_final[m][i][j].tolist()])[0][0])
#         similarity_noise[i].append(temp)
    
#     similarity_noise_final.append(similarity_noise)

# # plot for all different noise factors
# for n in range(len(data_final)):
#     res = [np.mean(similarity_noise_final[n][w]) for w in similarity_noise_final[n].keys() if w%100 == 0]
#     plt.plot([100*i for i in range(len(res))],res, '-o')
# plt.show()

# #plot for the last noise factor
# plt.plot([100*i for i in range(len(res))],res, '-o')
# plt.title('Showing how the data and noise combined in different steps')
# plt.xlabel('number of steps')
# plt.ylabel('cosine similarities')
# plt.show()

# n_step_list = []
# c = 0
# for noise_f in similarity_noise_final: 
#     for key, val in noise_f.items():
#         if np.mean(val) < 0.5:
#             print('for noise factor {} the number of step is {}'.format(noise_factor_list[c],key))
#             n_step_list.append(key)
#             break
#     c += 1
    

# noise_factor = noise_factor_list[0]
# n_steps = n_step_list[0]
n_steps = 50
noise_factor = 0.15

In [40]:
len(data_for_diffusion)

10000

In [41]:
# diffusion models
diffused_data_steps = [new_data_w_condition]
for _ in tqdm(range(n_steps)):
    new_data_w_condition[:,:vec_size] += noise_factor * torch.randn_like(new_data_)
    diffused_data_steps.append(new_data_w_condition)
    


100%|██████████| 50/50 [00:00<00:00, 133.11it/s]


In [23]:
# pickle.dump(diffused_data_steps, open('25_diffused_data_steps.p', 'wb'))
# diffused_data_steps = pickle.load(open('25_diffused_data_steps.p', 'rb'))

In [42]:
# Convert the diffused data steps into a dataset
# Each sample is a pair of subsequent diffused data steps
diffused_data_pairs=[torch.stack((diffused_data_steps[i+1],diffused_data_steps[i]),dim= 1) for i in range(n_steps)]
diffused_data = torch.cat(diffused_data_pairs, dim=0)
dataset_ = TensorDataset(*torch.unbind(diffused_data, dim=1))

In [43]:
#  Create a Dataloader to handle batching of dataset
dataloader = DataLoader(dataset_, batch_size=256, shuffle=True)

# Neural Network
model = nn.Sequential(nn.Linear(3*vec_size, vec_size), nn.ReLU(), nn.Dropout(0.2),
                     nn.Linear(vec_size, vec_size), nn.ReLU(), nn.Dropout(0.2),
                      nn.Linear(vec_size, 2*vec_size), nn.ReLU(), nn.Dropout(0.2),
                     nn.Linear(2*vec_size, 3*vec_size)
                     )

# now move the  model to device
model = model.to(device)

optimizer = optim.Adam(model.parameters(), lr = 0.001)
loss_func = nn.MSELoss()

In [44]:
# Training loop
for i in range(1, 501):
    model.train()

    for batch_noisy, batch_data in dataloader:
        # Move data to device
        batch_noisy, batch_data = batch_noisy.to(device), batch_data.to(device)

        optimizer.zero_grad()
        batch_denoised = model(batch_noisy)
        loss = loss_func(batch_denoised, batch_data)
        loss.backward()
        optimizer.step()

    if i % 10 == 0:
        print(f'Epoch {i}, Loss: {loss.item()}')

Epoch 10, Loss: 6.820949554443359
Epoch 20, Loss: 7.862959861755371
Epoch 30, Loss: 5.326531887054443
Epoch 40, Loss: 7.327765941619873
Epoch 50, Loss: 7.02612829208374
Epoch 60, Loss: 5.420815944671631
Epoch 70, Loss: 7.1498517990112305
Epoch 80, Loss: 5.689389705657959
Epoch 90, Loss: 6.386630535125732
Epoch 100, Loss: 6.168487548828125
Epoch 110, Loss: 6.018319606781006
Epoch 120, Loss: 6.360808849334717
Epoch 130, Loss: 5.305604934692383
Epoch 140, Loss: 6.055332183837891
Epoch 150, Loss: 6.240146636962891
Epoch 160, Loss: 5.417392253875732
Epoch 170, Loss: 5.3054609298706055
Epoch 180, Loss: 7.627220153808594
Epoch 190, Loss: 7.104910850524902
Epoch 200, Loss: 6.9619245529174805
Epoch 210, Loss: 6.322051048278809
Epoch 220, Loss: 5.991157531738281
Epoch 230, Loss: 7.260288238525391
Epoch 240, Loss: 5.267133712768555
Epoch 250, Loss: 5.832584381103516
Epoch 260, Loss: 4.940969467163086
Epoch 270, Loss: 6.420839309692383
Epoch 280, Loss: 5.463011741638184
Epoch 290, Loss: 4.59209823

In [45]:
# pickle.dump(model, open('25_model_vectorsize128_diffusion.p', 'wb'))
# model = pickle.load(open('25_model_vectorsize128_diffusion.p', 'rb'))

In [47]:
# import sys
# def sizeof_fmt(num, suffix='B'):
#     ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
#     for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
#         if abs(num) < 1024.0:
#             return "%3.1f %s%s" % (num, unit, suffix)
#         num /= 1024.0
#     return "%.1f %s%s" % (num, 'Yi', suffix)

# for name, size in sorted(((name, sys.getsizeof(value)) for name, value in list(
#                           locals().items())), key= lambda x: -x[1])[:10]:
#     print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))

In [48]:
collect_ids = {}
for i in range(len(data_timestamps)):
    collect_ids[i] = []
for t in range(len(data_timestamps)):
    for doc_ in data_timestamps[t]:
        collect_ids[t].append(doc_[0])

In [49]:
def cosin_similarity_doc_emb(vec_one, vec_two):
    return cosine_similarity([vec_one], [vec_two])[0][0]

In [50]:
def keyword_extractor(x_tf_idf_, list_of_words_in_tfidf_, top_keywords_):
    temp_dict = dict()
    for i,j in zip(x_tf_idf_, list_of_words_in_tfidf_):
        temp_dict[j] = i
    return sorted(temp_dict.items(), key=lambda x:x[1], reverse=True)[:top_keywords_]

In [51]:
def merge_keywords(list_one, list_two, factor_merging):
    merge_dict = dict()
    for data_ in list_one:
        merge_dict[data_[0]] = factor_merging * data_[1]
    for data_ in list_two:
        if data_[0] in merge_dict:
            merge_dict[data_[0]] = (merge_dict[data_[0]] + data_[1])/2
        else:
            merge_dict[data_[0]] = data_[1]
    return sorted(merge_dict.items(), key=lambda x:x[1], reverse=True)[:len(list_one)]

In [52]:
def keyword_similarity(list_one, list_two):
    score = 0
    
    for data_ in list_two:
        for data in list_one:
            if data[0] == data_[0]:
                score += min(data[1], data_[1])
    return score

In [53]:
def similarity_evaluator_emb_tf_idf(seed_emb, seed_tf_idf, list_of_emb_generated, list_of_tfidf_generated):
    cosine_sim = []
    tf_idf_sim = []
    for gen_emb in list_of_emb_generated:
        cosine_sim.append(cosine_similarity([seed_emb], [gen_emb])[0][0])
    for gen_tfidf in list_of_tfidf_generated:
        tf_idf_sim.append(sum(np.multiply(gen_tfidf, seed_tf_idf)))
    #normalized tf_idf
    temp_max = max(tf_idf_sim)+ 0.0001
    tf_idf_sim = [w/temp_max for w in tf_idf_sim]
    return [n*m for n,m in zip(cosine_sim, tf_idf_sim)]
        
    

In [54]:
def tf_idf_normalizer(tf_idf_vec):
    temp_list = []
    for el in tf_idf_vec:
        if (el > 0) and (el < 1):
            temp_list.append(el)
        elif el > 1:
            temp_list.append(1.0)
        else:
            temp_list.append(0.0)
    return temp_list

In [55]:
def ts_finder(ts_, collect_ids):
    for ke, val in collect_ids.items():
        if ts_ in val:
            return ke
    return -1

In [56]:
def remove_duplicate_embedding(list_of_embedding, max_sim=0.9):
    res_ = list()
    if len(list_of_embedding) == 2:
        return list_of_embedding
    else:
        res_.append(list_of_embedding[0])
        for i in range(1, len(list_of_embedding)):
            max_ = 0
            for j in range(len(res_)):
                cos_sim = cosine_similarity([res_[j]], [list_of_embedding[i]])[0][0]
                max_ = max_ if cos_sim < max_ else cos_sim
            if max_ < max_sim:
                res_.append(list_of_embedding[i])
    return res_

In [57]:
# this function recieves a seed and generats the relevant story to that seed
def story_gen_emb_tfidf_novel_method(seed_, collect_ids, min_size_generated_data_per_ts=50, min_acceptable_similarity= 0.9, min_keyword_overlapping= 0.60):
    ###SEARCH ALGORITHM
    starting_ts = ts_finder(seed_, collect_ids)
        
    seed_doc = seed_
    storyline = []
    storyline.append(seed_doc)
    storyline_keywords = keyword_extractor(tf_idf_dict[seed_doc], list_of_words_in_tfidf, 100)
    aux_doc = False
    for ts_ids in list(collect_ids.keys())[starting_ts+1:]:
        #preparing conditions and noises for diffusion model
        data_for_generator = []
        data_ids = [w[0] for w in data_timestamps[ts_ids]]
        seed_embedding = contextual_dict[seed_doc]
        temp = list()
        while len(data_for_generator) < min_size_generated_data_per_ts:
            for cent_ in centroids_ts_group[ts_ids]:
                temp[:vec_size] = seed_embedding
                temp[vec_size:] = cent_
                # data_for_generator.append([contextual_dict[seed_doc], cent_])
                data_for_generator.append(temp)
    
        random_doc_tensor = torch.randn((len(data_for_generator), len(data_for_generator[0]))).to(device)
        for i in range(len(data_for_generator)):
            random_doc_tensor[i] = torch.from_numpy(np.array(data_for_generator[i]))
                
        average_data = torch.mean(abs(random_doc_tensor))
        noise_data = torch.randn((len(data_for_generator), vec_size)).to(device)
        average_noise = torch.mean(abs(noise_data))
        noise_coef = average_data/average_noise
        noise_data = noise_coef*noise_data
        

                
        # concatinate the noises and conditions
        generated_data = torch.cat([abs(noise_data), random_doc_tensor.to(device)],-1)
        
        
        # concatinate the noises and conditions
        generated_data = torch.cat([abs(torch.randn((len(data_for_generator), vec_size))).to(device), random_doc_tensor.to(device)],-1)
    
        # run diffusion model to generate new embedding
        for _ in range(int(n_steps)):
            # print(generated_data)
            noise_data = generated_data[:,:vec_size] + noise_factor * torch.randn_like(generated_data[:,:vec_size])
            input_vec = torch.cat([random_doc_tensor, noise_data],-1)
            generated_data = model(input_vec)
    
        res_list = generated_data.cpu().detach().numpy()
        generated_doc_by_diffusion = [res_list[i][:vec_size] for i in range(len(res_list))]
            
        # predict the tf_idf for new generated samples
        X_tfidf = np.random.rand(len(generated_doc_by_diffusion), len(generated_doc_by_diffusion[0]))
        
        for i in range(len(X_tfidf)):
            X_tfidf[i] = generated_doc_by_diffusion[i]
        
        generated_tf_idf_by_nn = model_tfidf.predict(X_tfidf)

        #remove duplicated generated data (the ones are very similar to each other) (maximum acceptable cosine similarity)
        generated_tf_idf_by_nn = remove_duplicate_embedding(generated_tf_idf_by_nn, 0.95)
        
        #normalize the values of tf_idf vector (nothing less than zero and nothing greater than one)
        normalized_generated_tf_idf = [tf_idf_normalizer(w) for w in generated_tf_idf_by_nn]
    
    
        #candidate documents in the ts
        candidate_docs_id = collect_ids[ts_ids]
        #filter candidates based on the keyword overlaps
        candidate_docs_tf_idf = [keyword_extractor(tf_idf_dict[w], list_of_words_in_tfidf, 100) for w in candidate_docs_id]
        candidate_scores = [keyword_similarity(w,storyline_keywords) for w in candidate_docs_tf_idf]
        candidates = [candidate_docs_id[i] for i in range(len(candidate_docs_id)) if candidate_scores[i] > min_keyword_overlapping]
    
        if len(candidates) < 1:
            #there is no document in the timestamp with the minimum acceptable keyword overlap
            continue
        # calculate the similarity between generated emb and seed document
        generated_sim_with_seed = [cosin_similarity_doc_emb(w, seed_embedding) for w in generated_doc_by_diffusion]
    
        #pick the best generated document
        picked_generated_emb = generated_doc_by_diffusion[generated_sim_with_seed.index(max(generated_sim_with_seed))]
        # pick the most similar candidate to the selected generated document
        candidate_vs_generated_cosine_similarity = [cosin_similarity_doc_emb(picked_generated_emb, contextual_dict[w]) for w in candidates]
        max_candidate_sim = max(candidate_vs_generated_cosine_similarity)
        
        if max_candidate_sim < min_acceptable_similarity:
            # the best candidate is still not similar to the previous document. going to the next ts
            continue
        seed_doc = candidates[candidate_vs_generated_cosine_similarity.index(max_candidate_sim)]
        # add the new pick into the soryline
        storyline.append(seed_doc)
        # update the storyline_keywords
        merge_keywords(storyline_keywords, keyword_extractor(tf_idf_dict[seed_doc], list_of_words_in_tfidf, 100), 0.9)
    return storyline


In [58]:
zx = story_gen_emb_tfidf_novel_method(3594, collect_ids) 
print(*[[w, title_id_dict[w]] for w in zx], sep='\n')

[3594, 'Biden Administration Plans to Offer Second Booster Shots to Those 50 and Up']
[3873, 'Coronavirus deaths in the U.S. fall to their lowest point since the summer.']
[7378, 'With cases rising, Mayor Eric Adams is keeping New York City’s preschool mask mandate.']
[8367, 'The BA.2 subvariant prompts a slight increase in new U.S. cases.']
[9608, 'Biden says Americans should decide for themselves if they want to wear masks on public transportation.']
[10651, 'The European Union says the emergency phase of the pandemic is over.']
[11054, 'F.D.A. sets aside June for potential decisions on vaccines for the youngest Americans.']
[20536, 'As known cases climb in southern Africa, testing on the continent has fallen off.']
[21862, 'Pfizer Says 3 Vaccine Doses Produce Strong Response in Young Children']
[22382, 'Jay Inslee, governor of Washington, tests positive for the virus.']
[27066, 'Youngest Children Could Get Covid Shots in Late June, White House Says']
[27731, 'Aid Stalled, White Hous

In [59]:
# this function recieves a seed and generats the relevant story to that seed
def story_gen_just_emb_novel_method(seed_, collect_ids, min_size_generated_data_per_ts=50, min_acceptable_similarity= 0.9, min_keyword_overlapping= 0.60):
    ###SEARCH ALGORITHM
    starting_ts = ts_finder(seed_, collect_ids)
        
    seed_doc = seed_
    storyline = []
    storyline.append(seed_doc)
    # storyline_keywords = keyword_extractor(tf_idf_dict[seed_doc], list_of_words_in_tfidf, 100)
    # aux_doc = False
    for ts_ids in list(collect_ids.keys())[starting_ts+1:]:
        #preparing conditions and noises for diffusion model
        data_for_generator = []
        data_ids = [w[0] for w in data_timestamps[ts_ids]]
        seed_embedding = contextual_dict[seed_doc]
        temp = list()
        while len(data_for_generator) < min_size_generated_data_per_ts:
            for cent_ in centroids_ts_group[ts_ids]:
                temp[:vec_size] = seed_embedding
                temp[vec_size:] = cent_
                # data_for_generator.append([contextual_dict[seed_doc], cent_])
                data_for_generator.append(temp)
    
        random_doc_tensor = torch.randn((len(data_for_generator), len(data_for_generator[0]))).to(device)
        for i in range(len(data_for_generator)):
            random_doc_tensor[i] = torch.from_numpy(np.array(data_for_generator[i]))
                
        average_data = torch.mean(abs(random_doc_tensor))
        noise_data = torch.randn((len(data_for_generator), vec_size)).to(device)
        average_noise = torch.mean(abs(noise_data))
        noise_coef = average_data/average_noise
        noise_data = noise_coef*noise_data
        

                
        # concatinate the noises and conditions
        generated_data = torch.cat([abs(noise_data), random_doc_tensor.to(device)],-1)
        
        
        # concatinate the noises and conditions
        generated_data = torch.cat([abs(torch.randn((len(data_for_generator), vec_size))).to(device), random_doc_tensor.to(device)],-1)
    
        # run diffusion model to generate new embedding
        for _ in range(int(n_steps)):
            # print(generated_data)
            noise_data = generated_data[:,:vec_size] + noise_factor * torch.randn_like(generated_data[:,:vec_size])
            input_vec = torch.cat([random_doc_tensor, noise_data],-1)
            generated_data = model(input_vec)
    
        res_list = generated_data.cpu().detach().numpy()
        generated_doc_by_diffusion = [res_list[i][:vec_size] for i in range(len(res_list))]    
    
        #candidate documents in the ts
        candidate_docs_id = collect_ids[ts_ids]
        #filter candidates based on the keyword overlaps
        # candidate_docs_tf_idf = [keyword_extractor(tf_idf_dict[w], list_of_words_in_tfidf, 100) for w in candidate_docs_id]
        # candidate_scores = [keyword_similarity(w,storyline_keywords) for w in candidate_docs_tf_idf]
        candidates = [candidate_docs_id[i] for i in range(len(candidate_docs_id))]
        # if len(candidates) < 1:
        #     #there is no document in the timestamp with the minimum acceptable keyword overlap
        #     continue
        # calculate the similarity between generated emb and seed document
        generated_sim_with_seed = [cosin_similarity_doc_emb(w, seed_embedding) for w in generated_doc_by_diffusion]
        #pick the best generated document
        picked_generated_emb = generated_doc_by_diffusion[generated_sim_with_seed.index(max(generated_sim_with_seed))]

        # pick the most similar candidate to the selected generated document
        candidate_vs_generated_cosine_similarity = [cosin_similarity_doc_emb(picked_generated_emb, contextual_dict[w]) for w in candidates]
        max_candidate_sim = max(candidate_vs_generated_cosine_similarity)
        
        if max_candidate_sim < min_acceptable_similarity:
            # the best candidate is still not similar to the previous document. going to the next ts
            continue
        seed_doc = candidates[candidate_vs_generated_cosine_similarity.index(max_candidate_sim)]
        # add the new pick into the soryline
        storyline.append(seed_doc)
        # update the storyline_keywords
        # merge_keywords(storyline_keywords, keyword_extractor(tf_idf_dict[seed_doc], list_of_words_in_tfidf, 100), 0.9)
    return storyline


In [60]:
zx_1 = story_gen_just_emb_novel_method(3594, collect_ids, min_acceptable_similarity= 0.94) 
print(*[[w, title_id_dict[w]] for w in zx_1], sep='\n')

[3594, 'Biden Administration Plans to Offer Second Booster Shots to Those 50 and Up']
[9219, '‘Thank You for Not Killing Us’']
[9413, 'Russian attack on Lviv shakes sense of security in a relatively safe city.']
[10291, 'Covid Outbreak in Beijing Prompts Order for Nearly Citywide Testing']
[10777, 'In El Salvador, the President Cracks Down on Civil Liberties, and Is Beloved for It']
[19731, 'Worries about the strain on health care grow in the U.S. as hot spots spread and hospitalizations rise.']
[21147, 'With Plunging Enrollment, a ‘Seismic Hit’ to Public Schools']
[27692, 'Moderna says new trial results show that a revised vaccine works better against Omicron.']
[30439, 'F.D.A. May Move Toward Updating Vaccines']


In [61]:
# this function recieves a seed and generats the relevant story to that seed
def story_gen_just_tfidf_novel_method(seed_, collect_ids, min_size_generated_data_per_ts=50, min_acceptable_similarity= 0.9, min_keyword_overlapping= 0.60):
    ###SEARCH ALGORITHM
    starting_ts = ts_finder(seed_, collect_ids)
        
    seed_doc = seed_
    storyline = []
    storyline.append(seed_doc)
    storyline_keywords = keyword_extractor(tf_idf_dict[seed_doc], list_of_words_in_tfidf, 100)
    aux_doc = False
    for ts_ids in list(collect_ids.keys())[starting_ts+1:]:
        #candidate documents in the ts
        candidate_docs_id = collect_ids[ts_ids]
        #filter candidates based on the keyword overlaps
        candidate_docs_tf_idf = [keyword_extractor(tf_idf_dict[w], list_of_words_in_tfidf, 100) for w in candidate_docs_id]
        candidate_scores = [keyword_similarity(w,storyline_keywords) for w in candidate_docs_tf_idf]
        temp = dict()
        for i in range(len(candidate_docs_id)):
            temp[candidate_scores[i]] = candidate_docs_id[i]
        max_val = max(list(temp.keys()))
        if max_val > min_keyword_overlapping:
            seed_doc = temp[max_val]
            # add the new pick into the soryline
            storyline.append(seed_doc)
            # update the storyline_keywords
            merge_keywords(storyline_keywords, keyword_extractor(tf_idf_dict[seed_doc], list_of_words_in_tfidf, 100), 0.9)
        else:
            continue

    return storyline


In [62]:
zx_2 = story_gen_just_tfidf_novel_method(3594, collect_ids, min_acceptable_similarity= 0.94, min_keyword_overlapping= 0.6) 
print(*[[w, title_id_dict[w]] for w in zx_2], sep='\n')

[3594, 'Biden Administration Plans to Offer Second Booster Shots to Those 50 and Up']
[4120, 'Biden gets his second booster after his Covid remarks.']
[7434, 'President Biden asks for more funding and Hong Kong’s death toll strains mortuaries: The week in Covid news.']
[7829, 'The C.D.C. director says she ‘really would encourage’ second boosters for older people and many with chronic conditions.']
[8009, 'F.D.A. Panel Explores Challenges of Revamping Coronavirus Vaccines']
[8584, 'Virus outbreaks are pushing some U.S. universities to reinstate mask mandates.']
[8965, 'Pfizer Says Booster Strengthens Immune Response for Children 5 to 11']
[9361, 'Americans Over 60 Should Get Second Booster, Official Says']
[9953, 'The U.S. is extending a vaccine rule for international travelers at its land borders.']
[10512, 'Vaccines for young children are being delayed by incomplete data, a top F.D.A. official suggests.']
[11054, 'F.D.A. sets aside June for potential decisions on vaccines for the youn

In [63]:
def distributed_similarity(storyline_, storyline_keywords_, candidate_doc_, diffused_doc_, tf_idf_dict_=tf_idf_dict, contextual_dict_=contextual_dict, weight_for_past_doc=0.9):
    """
    This function receives a storyline_ (a list of already picked stories), a storyline_keywords_(a sorted list of most important keywords in the story)
    a diffused_doc_ (a generated embedding by diffusion as a hypothesis candidate), candidate_doc_ as a document that we want to pick as the next one,  and weight_for_past_doc as a weight to control the contribution of past
    """
    weights_list = []
    for doc_ in storyline_:
        key_word_sim = keyword_extractor(tf_idf_dict_[doc_], list_of_words_in_tfidf, 100)
        weights_list.append(keyword_similarity(key_word_sim, storyline_keywords_))
    #normalize weights
    min_ = min(weights_list)
    max_ = max(weights_list)
    normalized_weights = [(w - min_)/(abs(max_ - min_) + 0.0001) for w in weights_list]
    sim_ = cosine_similarity([contextual_dict_[candidate_doc_]],[diffused_doc_])[0][0]
    w_f_p_d = [weight_for_past_doc**(t+1) for t in reversed(range(len(storyline_)))]
    for ind in range(len(storyline_)):
        sim_ += w_f_p_d[ind]*weights_list[ind]*cosine_similarity([contextual_dict_[storyline_[ind]]], [diffused_doc_])[0][0]
    #normalize cosine similarities
    sim_ = sim_/(sum(w_f_p_d)+1)
    return sim_

In [142]:
def distributed_similarity_(storyline_, storyline_keywords_, candidate_doc_, diffused_doc_, tf_idf_dict_=tf_idf_dict, contextual_dict_=contextual_dict, weight_for_past_doc=0.9):
    """
    This function receives a storyline_ (a list of already picked stories), a storyline_keywords_(a sorted list of most important keywords in the story)
    a diffused_doc_ (a generated embedding by diffusion as a hypothesis candidate), candidate_doc_ as a document that we want to pick as the next one,  and weight_for_past_doc as a weight to control the contribution of past
    """
    weights_list = []
    for doc_ in storyline_:
        key_word_sim = keyword_extractor(tf_idf_dict_[doc_], list_of_words_in_tfidf, 100)
        weights_list.append(keyword_similarity(key_word_sim, storyline_keywords_))
    #normalize weights
    min_ = min(weights_list)
    max_ = max(weights_list)
    normalized_weights = [(w - min_)/(abs(max_ - min_) + 0.0001) for w in weights_list]
    sim_ = cosine_similarity([contextual_dict_[candidate_doc_]],[diffused_doc_])[0][0]
    w_f_p_d = [weight_for_past_doc**(t+1) for t in reversed(range(len(storyline_)))]
   
    for ind in range(len(storyline_)):
        sim_ += w_f_p_d[ind]*weights_list[ind]*cosine_similarity([contextual_dict_[storyline_[ind]]], [diffused_doc_])[0][0]
    #normalize cosine similarities
    sim_ = sim_/(sum(w_f_p_d)+1+sum(weights_list))
    return sim_/weight_for_past_doc

In [64]:
# this function recieves a seed and generats the relevant story to that seed
def story_gen_emb_tfidf_novel_method_w_distributed_attention(seed_, collect_ids, min_size_generated_data_per_ts=50, min_acceptable_similarity= 0.9, min_keyword_overlapping= 0.60):
    ###SEARCH ALGORITHM
    starting_ts = ts_finder(seed_, collect_ids)
        
    seed_doc = seed_
    storyline = []
    storyline.append(seed_doc)
    storyline_keywords = keyword_extractor(tf_idf_dict[seed_doc], list_of_words_in_tfidf, 100)
    
    for ts_ids in list(collect_ids.keys())[starting_ts+1:]:
        #preparing conditions and noises for diffusion model
        data_for_generator = []
        data_ids = [w[0] for w in data_timestamps[ts_ids]]
        seed_embedding = contextual_dict[seed_doc]
        temp = list()
        while len(data_for_generator) < min_size_generated_data_per_ts:
            for cent_ in centroids_ts_group[ts_ids]:
                temp[:vec_size] = seed_embedding
                temp[vec_size:] = cent_
                # data_for_generator.append([contextual_dict[seed_doc], cent_])
                data_for_generator.append(temp)
    
        random_doc_tensor = torch.randn((len(data_for_generator), len(data_for_generator[0]))).to(device)
        for i in range(len(data_for_generator)):
            random_doc_tensor[i] = torch.from_numpy(np.array(data_for_generator[i]))
                
        average_data = torch.mean(abs(random_doc_tensor))
        noise_data = torch.randn((len(data_for_generator), vec_size)).to(device)
        average_noise = torch.mean(abs(noise_data))
        noise_coef = average_data/average_noise
        noise_data = noise_coef*noise_data
        

                
        # concatinate the noises and conditions
        generated_data = torch.cat([abs(noise_data), random_doc_tensor.to(device)],-1)
        
        
        # concatinate the noises and conditions
        generated_data = torch.cat([abs(torch.randn((len(data_for_generator), vec_size))).to(device), random_doc_tensor.to(device)],-1)
    
        # run diffusion model to generate new embedding
        for _ in range(int(n_steps)):
            # print(generated_data)
            noise_data = generated_data[:,:vec_size] + noise_factor * torch.randn_like(generated_data[:,:vec_size])
            input_vec = torch.cat([random_doc_tensor, noise_data],-1)
            generated_data = model(input_vec)
    
        res_list = generated_data.cpu().detach().numpy()
        generated_doc_by_diffusion = [res_list[i][:vec_size] for i in range(len(res_list))]
            
        # predict the tf_idf for new generated samples
        X_tfidf = np.random.rand(len(generated_doc_by_diffusion), len(generated_doc_by_diffusion[0]))
        
        for i in range(len(X_tfidf)):
            X_tfidf[i] = generated_doc_by_diffusion[i]
        
        generated_tf_idf_by_nn = model_tfidf.predict(X_tfidf)

        #remove duplicated generated data (the ones are very similar to each other) (maximum acceptable cosine similarity)
        generated_tf_idf_by_nn = remove_duplicate_embedding(generated_tf_idf_by_nn, 0.95)
        
        #normalize the values of tf_idf vector (nothing less than zero and nothing greater than one)
        normalized_generated_tf_idf = [tf_idf_normalizer(w) for w in generated_tf_idf_by_nn]
    
    
        #candidate documents in the ts
        candidate_docs_id = collect_ids[ts_ids]
        #filter candidates based on the keyword overlaps
        candidate_docs_tf_idf = [keyword_extractor(tf_idf_dict[w], list_of_words_in_tfidf, 100) for w in candidate_docs_id]
        candidate_scores = [keyword_similarity(w,storyline_keywords) for w in candidate_docs_tf_idf]
        candidates = [candidate_docs_id[i] for i in range(len(candidate_docs_id)) if candidate_scores[i] > min_keyword_overlapping]
    
        if len(candidates) < 1:
            #there is no document in the timestamp with the minimum acceptable keyword overlap
            continue
        # calculate the similarity between generated emb and seed document
        generated_sim_with_seed = [cosin_similarity_doc_emb(w, seed_embedding) for w in generated_doc_by_diffusion]
    
        #pick the best generated document
        picked_generated_emb = generated_doc_by_diffusion[generated_sim_with_seed.index(max(generated_sim_with_seed))]
        # pick the most similar candidate to the selected generated document

        #using distributed attention similarity
        candidate_vs_generated_cosine_similarity = [distributed_similarity(storyline, storyline_keywords, w, picked_generated_emb) for w in candidates]


        
        # candidate_vs_generated_cosine_similarity = [cosin_similarity_doc_emb(picked_generated_emb, contextual_dict[w]) for w in candidates]
        max_candidate_sim = max(candidate_vs_generated_cosine_similarity)
        
        if max_candidate_sim < min_acceptable_similarity:
            # the best candidate is still not similar to the previous document. going to the next ts
            continue
        seed_doc = candidates[candidate_vs_generated_cosine_similarity.index(max_candidate_sim)]
        # add the new pick into the soryline
        storyline.append(seed_doc)
        # update the storyline_keywords
        merge_keywords(storyline_keywords, keyword_extractor(tf_idf_dict[seed_doc], list_of_words_in_tfidf, 100), 0.9)
    return storyline


In [65]:
zx_3 = story_gen_emb_tfidf_novel_method_w_distributed_attention(3594, collect_ids, min_acceptable_similarity= 0.94, min_keyword_overlapping= 0.6) 
print(*[[w, title_id_dict[w]] for w in zx_3], sep='\n')

[3594, 'Biden Administration Plans to Offer Second Booster Shots to Those 50 and Up']
[3873, 'Coronavirus deaths in the U.S. fall to their lowest point since the summer.']
[7378, 'With cases rising, Mayor Eric Adams is keeping New York City’s preschool mask mandate.']
[7976, 'European health agencies say it is ‘too early’ for second boosters for most people.']
[8367, 'The BA.2 subvariant prompts a slight increase in new U.S. cases.']
[8584, 'Virus outbreaks are pushing some U.S. universities to reinstate mask mandates.']
[8965, 'Pfizer Says Booster Strengthens Immune Response for Children 5 to 11']
[9361, 'Americans Over 60 Should Get Second Booster, Official Says']
[9608, 'Biden says Americans should decide for themselves if they want to wear masks on public transportation.']
[10651, 'The European Union says the emergency phase of the pandemic is over.']
[11054, 'F.D.A. sets aside June for potential decisions on vaccines for the youngest Americans.']
[19031, 'Inside China’s Zero-Covid

In [83]:
all_ids = []
for i in collect_ids:
    for j in collect_ids[i]:
        all_ids.append(j)

In [84]:
seen_doc = set()
result_dict_story_gen_emb_tfidf_novel_method_w_distributed_attention = dict()
for se in tqdm(all_ids):
    if se in seen_doc:
        continue
    print(se)
    res = story_gen_emb_tfidf_novel_method_w_distributed_attention(se, collect_ids, min_acceptable_similarity= 0.94, min_keyword_overlapping= 0.6) 
    result_dict_story_gen_emb_tfidf_novel_method_w_distributed_attention[se] = res
    for d in res:
        seen_doc.add(d)

  0%|          | 0/8322 [00:00<?, ?it/s]

22989


  0%|          | 1/8322 [03:05<429:55:02, 186.00s/it]

23079


  0%|          | 2/8322 [06:40<469:03:04, 202.95s/it]

23002


  0%|          | 3/8322 [11:05<534:23:29, 231.25s/it]

23124


  0%|          | 4/8322 [14:02<484:27:10, 209.67s/it]

23131


  0%|          | 5/8322 [18:00<508:27:28, 220.09s/it]

23119


  0%|          | 6/8322 [21:19<491:19:43, 212.70s/it]

23192


  0%|          | 7/8322 [24:52<491:50:24, 212.94s/it]

23066


  0%|          | 8/8322 [29:05<521:06:21, 225.64s/it]

22988


  0%|          | 9/8322 [32:08<490:01:16, 212.21s/it]

23159


  0%|          | 10/8322 [36:29<524:59:53, 227.38s/it]

23107


  0%|          | 11/8322 [40:38<539:58:05, 233.89s/it]

23022


  0%|          | 12/8322 [43:40<503:59:10, 218.33s/it]

23045


  0%|          | 13/8322 [46:06<452:46:57, 196.17s/it]

23110


  0%|          | 14/8322 [50:01<480:17:28, 208.12s/it]

23084


  0%|          | 15/8322 [52:11<425:47:19, 184.52s/it]

23017


  0%|          | 16/8322 [55:35<439:05:32, 190.31s/it]

23036


  0%|          | 17/8322 [58:26<425:34:03, 184.47s/it]

23023


  0%|          | 18/8322 [1:01:33<427:16:26, 185.23s/it]

23005


  0%|          | 19/8322 [1:05:48<475:40:14, 206.24s/it]

23123


  0%|          | 20/8322 [1:08:35<448:09:47, 194.34s/it]

23061


  0%|          | 21/8322 [1:11:28<433:48:07, 188.13s/it]

23121


  0%|          | 22/8322 [1:14:24<425:01:04, 184.35s/it]

23073


  0%|          | 23/8322 [1:18:04<449:50:28, 195.14s/it]

23020


  0%|          | 24/8322 [1:21:18<449:04:10, 194.82s/it]

23163


  0%|          | 25/8322 [1:24:21<440:39:28, 191.20s/it]

23177


  0%|          | 26/8322 [1:30:49<576:39:15, 250.24s/it]

23077


  0%|          | 27/8322 [1:35:14<586:45:21, 254.65s/it]

23130


  0%|          | 28/8322 [1:38:25<542:31:44, 235.48s/it]

23188


  0%|          | 29/8322 [1:42:18<540:53:23, 234.80s/it]

23054


  0%|          | 30/8322 [1:45:39<517:53:42, 224.85s/it]

22990


  0%|          | 31/8322 [1:48:52<495:45:45, 215.26s/it]

23189


  0%|          | 32/8322 [1:53:04<521:04:07, 226.28s/it]

23115


  0%|          | 33/8322 [1:56:48<518:54:58, 225.37s/it]

23040


  0%|          | 34/8322 [2:00:48<529:26:06, 229.97s/it]

23013


  0%|          | 35/8322 [2:04:07<507:34:44, 220.50s/it]

23185


  0%|          | 36/8322 [2:07:11<482:39:43, 209.70s/it]

23126


  0%|          | 37/8322 [2:10:49<488:00:07, 212.05s/it]

22997


  0%|          | 38/8322 [2:13:55<469:51:42, 204.19s/it]

23060


  0%|          | 39/8322 [2:16:39<442:32:38, 192.34s/it]

23041


  0%|          | 40/8322 [2:19:05<410:26:22, 178.41s/it]

22986


  0%|          | 41/8322 [2:22:24<424:49:03, 184.68s/it]

23004


  1%|          | 42/8322 [2:25:29<424:24:43, 184.53s/it]

23137


  1%|          | 43/8322 [2:28:59<442:26:53, 192.39s/it]

23057


  1%|          | 44/8322 [2:31:35<417:03:14, 181.37s/it]

23136


  1%|          | 45/8322 [2:34:52<427:26:42, 185.91s/it]

23068


  1%|          | 46/8322 [2:36:51<381:14:54, 165.84s/it]

23064


  1%|          | 47/8322 [2:39:59<396:27:21, 172.48s/it]

23146


  1%|          | 48/8322 [2:44:16<455:07:08, 198.02s/it]

23166


  1%|          | 49/8322 [2:46:52<425:57:02, 185.35s/it]

23055


  1%|          | 50/8322 [2:50:39<454:49:45, 197.94s/it]

23026


  1%|          | 51/8322 [2:53:24<431:54:51, 187.99s/it]

23018


  1%|          | 52/8322 [2:57:11<459:01:14, 199.82s/it]

22996


  1%|          | 53/8322 [3:00:19<450:38:39, 196.19s/it]

22998


  1%|          | 54/8322 [3:03:10<433:00:26, 188.54s/it]

23006


  1%|          | 55/8322 [3:05:47<411:29:27, 179.19s/it]

23167


  1%|          | 56/8322 [3:10:01<462:28:08, 201.41s/it]

22983


  1%|          | 57/8322 [3:13:10<454:13:06, 197.84s/it]

23080


  1%|          | 58/8322 [3:16:17<446:55:55, 194.69s/it]

23031


  1%|          | 59/8322 [3:19:02<426:04:51, 185.63s/it]

23035


  1%|          | 60/8322 [3:22:06<424:43:37, 185.07s/it]

22994


  1%|          | 61/8322 [3:25:15<427:55:15, 186.48s/it]

23147


  1%|          | 62/8322 [3:28:12<421:01:20, 183.50s/it]

23122


  1%|          | 63/8322 [3:31:05<413:54:00, 180.41s/it]

23104


  1%|          | 64/8322 [3:33:53<405:20:36, 176.71s/it]

23525


  1%|          | 65/8322 [3:37:39<439:24:38, 191.58s/it]

23565


  1%|          | 66/8322 [3:40:24<420:53:49, 183.53s/it]

23489


  1%|          | 67/8322 [3:43:20<415:30:01, 181.20s/it]

23312


  1%|          | 68/8322 [3:46:09<406:54:56, 177.48s/it]

23400


  1%|          | 69/8322 [3:50:27<462:36:50, 201.79s/it]

23344


  1%|          | 70/8322 [3:54:50<504:26:29, 220.07s/it]

23461


  1%|          | 72/8322 [3:58:39<392:31:23, 171.28s/it]

23331


  1%|          | 73/8322 [4:03:27<458:56:17, 200.29s/it]

23197


  1%|          | 74/8322 [4:06:45<457:40:13, 199.76s/it]

23298


  1%|          | 75/8322 [4:09:15<426:34:53, 186.21s/it]

23551


  1%|          | 76/8322 [4:12:52<446:09:50, 194.78s/it]

23510


  1%|          | 79/8322 [4:16:22<290:12:01, 126.74s/it]

23481


  1%|          | 81/8322 [4:22:54<344:13:48, 150.37s/it]

23548


  1%|          | 82/8322 [4:26:20<369:17:14, 161.34s/it]

23466


  1%|          | 83/8322 [4:29:52<394:36:22, 172.42s/it]

23474


  1%|          | 84/8322 [4:32:59<402:27:01, 175.87s/it]

23553


  1%|          | 85/8322 [4:36:40<428:30:44, 187.28s/it]

23431


  1%|          | 86/8322 [4:40:14<444:42:02, 194.38s/it]

23359


  1%|          | 88/8322 [4:43:57<362:37:07, 158.54s/it]

23206


  1%|          | 89/8322 [4:47:14<383:37:17, 167.74s/it]

23201


  1%|          | 90/8322 [4:50:47<409:26:39, 179.06s/it]

23395


  1%|          | 92/8322 [4:53:55<327:54:54, 143.44s/it]

23291


  1%|          | 93/8322 [4:56:43<341:03:52, 149.21s/it]

23569


  1%|          | 96/8322 [5:00:15<251:36:24, 110.11s/it]

23624


  1%|          | 98/8322 [5:04:11<257:27:03, 112.70s/it]

23511


  1%|          | 99/8322 [5:07:22<290:40:50, 127.26s/it]

23296


  1%|          | 100/8322 [5:11:53<359:51:15, 157.56s/it]

23397


  1%|          | 102/8322 [5:14:40<296:28:24, 129.84s/it]

23248


  1%|          | 104/8322 [5:18:53<293:45:01, 128.68s/it]

23570


  1%|▏         | 105/8322 [5:21:36<309:11:12, 135.46s/it]

23442


  1%|▏         | 107/8322 [5:24:56<279:42:56, 122.58s/it]

23460


  1%|▏         | 109/8322 [5:27:23<241:25:32, 105.82s/it]

23193


  1%|▏         | 111/8322 [5:30:50<239:34:36, 105.04s/it]

23608


  1%|▏         | 114/8322 [5:34:00<200:20:41, 87.87s/it] 

23199


  1%|▏         | 115/8322 [5:36:48<230:12:07, 100.98s/it]

23550


  1%|▏         | 117/8322 [5:40:24<235:07:04, 103.16s/it]

23417


  1%|▏         | 118/8322 [5:43:25<268:09:06, 117.67s/it]

23266


  1%|▏         | 119/8322 [5:46:46<307:57:23, 135.15s/it]

23480


  1%|▏         | 120/8322 [5:49:34<325:27:18, 142.85s/it]

23576


  1%|▏         | 121/8322 [5:53:55<391:47:14, 171.98s/it]

23613


  1%|▏         | 122/8322 [5:57:50<429:25:04, 188.52s/it]

23581


  1%|▏         | 123/8322 [6:01:11<437:00:53, 191.88s/it]

23313


  1%|▏         | 124/8322 [6:04:17<432:58:29, 190.13s/it]

23618


  2%|▏         | 125/8322 [6:08:21<468:21:51, 205.70s/it]

23428


  2%|▏         | 127/8322 [6:14:11<436:54:44, 191.93s/it]

23383


  2%|▏         | 129/8322 [6:16:53<337:56:08, 148.49s/it]

23458


  2%|▏         | 131/8322 [6:20:52<314:12:10, 138.09s/it]

23401


  2%|▏         | 132/8322 [6:23:39<327:36:33, 144.00s/it]

23216


  2%|▏         | 133/8322 [6:27:07<360:13:41, 158.36s/it]

23468


  2%|▏         | 134/8322 [6:30:41<391:02:57, 171.93s/it]

23260


  2%|▏         | 135/8322 [6:35:10<448:00:09, 197.00s/it]

23443


  2%|▏         | 136/8322 [6:38:09<436:47:07, 192.09s/it]

23541


  2%|▏         | 137/8322 [6:41:19<435:50:48, 191.70s/it]

23282


  2%|▏         | 138/8322 [6:45:25<470:55:30, 207.15s/it]

23487


  2%|▏         | 140/8322 [6:48:38<357:59:13, 157.51s/it]

23247


  2%|▏         | 141/8322 [6:51:43<372:55:25, 164.10s/it]

23252


  2%|▏         | 142/8322 [6:55:01<392:29:51, 172.74s/it]

23445


  2%|▏         | 143/8322 [6:57:43<386:19:54, 170.04s/it]

23337


  2%|▏         | 144/8322 [7:00:49<396:27:21, 174.52s/it]

23210


  2%|▏         | 145/8322 [7:04:25<422:35:55, 186.05s/it]

23603


  2%|▏         | 146/8322 [7:07:44<431:21:38, 189.93s/it]

23749


  2%|▏         | 149/8322 [7:11:00<274:55:08, 121.09s/it]

23723


  2%|▏         | 150/8322 [7:13:57<301:08:15, 132.66s/it]

23873


  2%|▏         | 151/8322 [7:16:59<326:58:24, 144.06s/it]

23776


  2%|▏         | 153/8322 [7:19:59<278:25:42, 122.70s/it]

23874


  2%|▏         | 154/8322 [7:23:41<327:38:20, 144.41s/it]

23664


  2%|▏         | 156/8322 [7:26:52<284:45:45, 125.54s/it]

23899


  2%|▏         | 158/8322 [7:29:56<257:36:59, 113.60s/it]

23887


  2%|▏         | 160/8322 [7:32:54<239:00:58, 105.42s/it]

23837


  2%|▏         | 162/8322 [7:37:16<257:41:26, 113.69s/it]

23854


  2%|▏         | 163/8322 [7:43:42<373:46:44, 164.92s/it]

23756


  2%|▏         | 165/8322 [7:46:33<310:46:45, 137.16s/it]

23810


  2%|▏         | 167/8322 [7:50:07<288:02:44, 127.16s/it]

23688


  2%|▏         | 168/8322 [7:53:06<310:26:50, 137.06s/it]

23902


  2%|▏         | 170/8322 [7:56:09<274:00:02, 121.00s/it]

23845


  2%|▏         | 171/8322 [7:59:15<303:23:44, 134.00s/it]

23863


  2%|▏         | 174/8322 [8:02:36<232:50:14, 102.87s/it]

23721


  2%|▏         | 175/8322 [8:06:47<293:29:52, 129.69s/it]

23859


  2%|▏         | 177/8322 [8:09:51<264:29:42, 116.90s/it]

23841


  2%|▏         | 178/8322 [8:13:24<307:10:01, 135.78s/it]

23625


  2%|▏         | 179/8322 [8:16:58<345:53:02, 152.91s/it]

23833


  2%|▏         | 181/8322 [8:21:22<327:14:09, 144.71s/it]

23862


  2%|▏         | 183/8322 [8:24:32<287:24:25, 127.12s/it]

23634


  2%|▏         | 185/8322 [8:27:29<258:02:05, 114.16s/it]

23827


  2%|▏         | 187/8322 [8:30:27<239:30:44, 105.99s/it]

23631


  2%|▏         | 189/8322 [8:33:19<225:03:03, 99.62s/it] 

23916


  2%|▏         | 190/8322 [8:36:30<263:07:02, 116.48s/it]

23895


  2%|▏         | 192/8322 [8:39:09<234:26:02, 103.81s/it]

23733


  2%|▏         | 194/8322 [8:42:44<237:02:58, 104.99s/it]

23628


  2%|▏         | 195/8322 [8:46:29<288:38:48, 127.86s/it]

23788


  2%|▏         | 198/8322 [8:48:53<207:25:36, 91.92s/it] 

23901


  2%|▏         | 199/8322 [8:51:51<241:36:21, 107.08s/it]

23638


  2%|▏         | 200/8322 [8:54:48<273:26:33, 121.20s/it]

23826


  2%|▏         | 201/8322 [8:57:27<292:47:35, 129.79s/it]

23643


  2%|▏         | 203/8322 [9:00:45<265:23:52, 117.68s/it]

23705


  2%|▏         | 205/8322 [9:03:43<242:21:11, 107.49s/it]

23875


  2%|▏         | 208/8322 [9:07:47<216:41:45, 96.14s/it] 

23831


  3%|▎         | 213/8322 [9:10:55<149:34:06, 66.40s/it]

23809


  3%|▎         | 220/8322 [9:15:01<113:58:29, 50.64s/it]

23871


  3%|▎         | 221/8322 [9:18:33<147:47:17, 65.68s/it]

23828


  3%|▎         | 222/8322 [9:21:16<173:34:54, 77.15s/it]

23794


  3%|▎         | 223/8322 [9:24:18<207:23:16, 92.18s/it]

23777


  3%|▎         | 224/8322 [9:27:22<242:45:04, 107.92s/it]

23735


  3%|▎         | 225/8322 [9:31:20<299:56:01, 133.35s/it]

23896


  3%|▎         | 226/8322 [9:34:57<340:39:25, 151.48s/it]

23714


  3%|▎         | 227/8322 [9:38:46<382:19:23, 170.03s/it]

23829


  3%|▎         | 228/8322 [9:41:55<393:20:34, 174.95s/it]

24089


  3%|▎         | 231/8322 [9:45:10<264:08:06, 117.52s/it]

24025


  3%|▎         | 232/8322 [9:49:00<314:41:46, 140.04s/it]

24075


  3%|▎         | 233/8322 [9:51:35<321:51:28, 143.24s/it]

23941


  3%|▎         | 234/8322 [9:55:25<368:51:50, 164.18s/it]

24125


  3%|▎         | 235/8322 [10:00:03<434:02:39, 193.22s/it]

24061


  3%|▎         | 236/8322 [10:02:51<419:00:57, 186.55s/it]

24193


  3%|▎         | 237/8322 [10:06:50<451:12:08, 200.91s/it]

24026


  3%|▎         | 239/8322 [10:09:47<339:55:03, 151.39s/it]

24325


  3%|▎         | 245/8322 [10:13:58<178:53:09, 79.73s/it] 

24303


  3%|▎         | 248/8322 [10:17:23<170:29:10, 76.02s/it]

24239


  3%|▎         | 249/8322 [10:21:10<215:16:43, 96.00s/it]

24148


  3%|▎         | 252/8322 [10:24:06<184:48:31, 82.44s/it]

24240


  3%|▎         | 253/8322 [10:28:49<250:52:23, 111.93s/it]

24013


  3%|▎         | 254/8322 [10:33:14<310:06:48, 138.37s/it]

24042


  3%|▎         | 257/8322 [10:35:43<225:30:54, 100.66s/it]

24329


  3%|▎         | 258/8322 [10:39:09<265:07:53, 118.36s/it]

24194


  3%|▎         | 261/8322 [10:42:40<220:00:18, 98.25s/it] 

24268


  3%|▎         | 263/8322 [10:45:01<202:16:03, 90.35s/it]

24005


  3%|▎         | 264/8322 [10:49:17<264:42:18, 118.26s/it]

24132


  3%|▎         | 266/8322 [10:52:57<258:52:47, 115.69s/it]

24316


  3%|▎         | 268/8322 [10:56:12<245:50:56, 109.89s/it]

24153


  3%|▎         | 270/8322 [10:59:43<242:44:55, 108.53s/it]

24146


  3%|▎         | 272/8322 [11:02:53<233:21:08, 104.36s/it]

24338


  3%|▎         | 273/8322 [11:05:43<259:51:48, 116.23s/it]

24269


  3%|▎         | 274/8322 [11:09:17<304:22:26, 136.15s/it]

23990


  3%|▎         | 276/8322 [11:12:30<271:46:02, 121.60s/it]

23952


  3%|▎         | 279/8322 [11:15:20<207:22:51, 92.82s/it] 

24180


  3%|▎         | 286/8322 [11:18:34<120:37:11, 54.04s/it]

24088


  3%|▎         | 288/8322 [11:21:47<139:10:34, 62.36s/it]

23954


  3%|▎         | 289/8322 [11:24:42<170:02:23, 76.20s/it]

24067


  3%|▎         | 290/8322 [11:27:26<199:15:19, 89.31s/it]

24190


  3%|▎         | 291/8322 [11:30:37<238:56:46, 107.11s/it]

24137


  4%|▎         | 294/8322 [11:33:37<193:36:28, 86.82s/it] 

24271


  4%|▎         | 295/8322 [11:36:39<229:39:02, 103.00s/it]

24313


  4%|▎         | 296/8322 [11:39:46<266:21:35, 119.47s/it]

24158


  4%|▎         | 298/8322 [11:44:16<278:31:46, 124.96s/it]

23951


  4%|▎         | 301/8322 [11:47:53<227:28:31, 102.10s/it]

24219


  4%|▎         | 302/8322 [11:51:15<265:42:32, 119.27s/it]

23975


  4%|▎         | 305/8322 [11:55:43<237:21:15, 106.58s/it]

24169


  4%|▎         | 308/8322 [11:58:48<199:40:39, 89.70s/it] 

24150


  4%|▎         | 310/8322 [12:02:23<209:59:25, 94.35s/it]

23969


  4%|▎         | 311/8322 [12:05:16<237:43:22, 106.83s/it]

24000


  4%|▎         | 312/8322 [12:08:23<270:35:47, 121.62s/it]

24633


  4%|▍         | 314/8322 [12:11:38<252:16:46, 113.41s/it]

24412


  4%|▍         | 315/8322 [12:14:25<275:48:44, 124.01s/it]

24610


  4%|▍         | 316/8322 [12:17:46<313:14:15, 140.85s/it]

24460


  4%|▍         | 320/8322 [12:21:05<200:17:55, 90.11s/it] 

24608


  4%|▍         | 321/8322 [12:23:53<229:07:15, 103.09s/it]

24734


  4%|▍         | 322/8322 [12:26:50<260:26:03, 117.20s/it]

24729


  4%|▍         | 323/8322 [12:31:04<325:45:33, 146.61s/it]

24376


  4%|▍         | 329/8322 [12:34:26<162:57:16, 73.39s/it] 

24446


  4%|▍         | 330/8322 [12:39:01<222:43:57, 100.33s/it]

24747


  4%|▍         | 337/8322 [12:42:37<134:23:24, 60.59s/it] 

24352


  4%|▍         | 339/8322 [12:45:47<148:51:48, 67.13s/it]

24687


  4%|▍         | 340/8322 [12:48:29<173:45:56, 78.37s/it]

24396


  4%|▍         | 342/8322 [12:52:04<190:07:41, 85.77s/it]

24374


  4%|▍         | 345/8322 [12:55:10<171:34:48, 77.43s/it]

24361


  4%|▍         | 346/8322 [12:58:02<201:30:30, 90.95s/it]

24450


  4%|▍         | 348/8322 [13:01:21<207:07:29, 93.51s/it]

24419


  4%|▍         | 351/8322 [13:04:05<173:59:50, 78.58s/it]

24468


  4%|▍         | 352/8322 [13:06:36<198:46:27, 89.79s/it]

24696


  4%|▍         | 353/8322 [13:10:36<258:44:01, 116.88s/it]

24415


  4%|▍         | 356/8322 [13:13:18<197:57:34, 89.46s/it] 

24509


  4%|▍         | 357/8322 [13:15:42<218:48:27, 98.90s/it]

24728


  4%|▍         | 360/8322 [13:18:53<185:32:02, 83.89s/it]

24702


  4%|▍         | 362/8322 [13:21:27<181:05:38, 81.90s/it]

24500


  4%|▍         | 363/8322 [13:24:49<226:25:09, 102.41s/it]

24344


  4%|▍         | 366/8322 [13:28:35<200:58:21, 90.94s/it] 

24479


  4%|▍         | 369/8322 [13:31:42<177:08:28, 80.18s/it]

24614


  4%|▍         | 370/8322 [13:34:15<201:49:06, 91.37s/it]

24612


  4%|▍         | 373/8322 [13:36:55<168:29:46, 76.31s/it]

24497


  5%|▍         | 375/8322 [13:42:52<230:11:12, 104.27s/it]

24670


  5%|▍         | 376/8322 [13:45:26<248:01:20, 112.37s/it]

24622


  5%|▍         | 380/8322 [13:48:00<169:08:46, 76.67s/it] 

24750


  5%|▍         | 381/8322 [13:50:44<197:21:22, 89.47s/it]

24488


  5%|▍         | 384/8322 [13:53:17<164:31:17, 74.61s/it]

24755


  5%|▍         | 385/8322 [13:56:38<207:58:51, 94.33s/it]

24689


  5%|▍         | 386/8322 [13:59:15<232:51:06, 105.63s/it]

24621


  5%|▍         | 387/8322 [14:02:39<277:35:37, 125.94s/it]

24738


  5%|▍         | 389/8322 [14:05:36<246:53:29, 112.04s/it]

24508


  5%|▍         | 390/8322 [14:08:07<264:50:30, 120.20s/it]

24552


  5%|▍         | 391/8322 [14:11:28<305:49:35, 138.82s/it]

24705


  5%|▍         | 393/8322 [14:15:33<291:40:50, 132.43s/it]

24722


  5%|▍         | 395/8322 [14:18:43<261:45:44, 118.88s/it]

24462


  5%|▍         | 399/8322 [14:22:03<184:36:10, 83.88s/it] 

25024


  5%|▍         | 404/8322 [14:25:54<145:02:31, 65.94s/it]

24898


  5%|▍         | 406/8322 [14:28:14<146:58:17, 66.84s/it]

24770


  5%|▍         | 410/8322 [14:32:14<141:09:09, 64.23s/it]

24884


  5%|▍         | 411/8322 [14:35:38<177:41:43, 80.86s/it]

24811


  5%|▍         | 412/8322 [14:38:19<203:22:49, 92.56s/it]

24763


  5%|▌         | 418/8322 [14:41:24<127:52:13, 58.24s/it]

24870


  5%|▌         | 420/8322 [14:44:28<143:17:59, 65.28s/it]

24918


  5%|▌         | 421/8322 [14:47:23<174:34:49, 79.55s/it]

24979


  5%|▌         | 422/8322 [14:49:47<196:47:31, 89.68s/it]

24848


  5%|▌         | 425/8322 [14:52:17<161:56:13, 73.82s/it]

24880


  5%|▌         | 429/8322 [14:55:24<136:00:05, 62.03s/it]

24791


  5%|▌         | 432/8322 [14:58:40<138:13:14, 63.07s/it]

24841


  5%|▌         | 435/8322 [15:00:58<126:36:03, 57.79s/it]

24790


  5%|▌         | 436/8322 [15:04:02<162:12:22, 74.05s/it]

24864


  5%|▌         | 438/8322 [15:08:12<192:14:19, 87.78s/it]

24804


  5%|▌         | 440/8322 [15:11:49<204:58:55, 93.62s/it]

24789


  5%|▌         | 442/8322 [15:14:24<194:48:52, 89.00s/it]

24885


  5%|▌         | 444/8322 [15:17:54<205:03:19, 93.70s/it]

24891


  5%|▌         | 445/8322 [15:20:28<227:13:28, 103.85s/it]

24917


  5%|▌         | 446/8322 [15:23:29<260:22:48, 119.02s/it]

24915


  5%|▌         | 449/8322 [15:26:11<195:33:07, 89.42s/it] 

24932


  5%|▌         | 451/8322 [15:29:05<193:53:47, 88.68s/it]

25036


  5%|▌         | 453/8322 [15:31:58<192:20:21, 87.99s/it]

24905


  5%|▌         | 455/8322 [15:35:17<199:51:33, 91.46s/it]

25022


  5%|▌         | 456/8322 [15:37:36<218:21:53, 99.94s/it]

24824


  6%|▌         | 459/8322 [15:41:01<188:25:16, 86.27s/it]

24867


  6%|▌         | 462/8322 [15:45:01<183:01:43, 83.83s/it]

24893


  6%|▌         | 465/8322 [15:47:51<162:08:38, 74.29s/it]

25315


  6%|▌         | 469/8322 [15:51:34<145:48:27, 66.84s/it]

25552


  6%|▌         | 473/8322 [15:53:54<120:21:17, 55.20s/it]

25299


  6%|▌         | 474/8322 [15:59:23<189:05:51, 86.74s/it]

25197


  6%|▌         | 475/8322 [16:03:44<242:48:21, 111.39s/it]

25526


  6%|▌         | 480/8322 [16:06:10<152:42:15, 70.10s/it] 

25180


  6%|▌         | 481/8322 [16:09:28<187:36:04, 86.13s/it]

25079


  6%|▌         | 484/8322 [16:13:02<176:14:49, 80.95s/it]

25219


  6%|▌         | 485/8322 [16:18:05<245:25:33, 112.74s/it]

25135


  6%|▌         | 490/8322 [16:24:53<210:55:13, 96.95s/it] 

25184


  6%|▌         | 491/8322 [16:30:03<269:23:13, 123.84s/it]

25231


  6%|▌         | 494/8322 [16:33:26<226:34:43, 104.20s/it]

25124


  6%|▌         | 496/8322 [16:36:43<223:14:57, 102.70s/it]

25435


  6%|▌         | 497/8322 [16:41:05<275:51:16, 126.91s/it]

25572


  6%|▌         | 498/8322 [16:44:29<305:25:59, 140.54s/it]

25300


  6%|▌         | 500/8322 [16:47:46<274:35:10, 126.38s/it]

25122


  6%|▌         | 502/8322 [16:50:32<244:09:37, 112.40s/it]

25176


  6%|▌         | 508/8322 [16:53:26<138:32:10, 63.83s/it] 

25322


  6%|▌         | 511/8322 [16:58:05<157:06:06, 72.41s/it]

25250


  6%|▌         | 513/8322 [17:01:34<172:08:11, 79.36s/it]

25133


  6%|▌         | 514/8322 [17:04:58<208:48:51, 96.28s/it]

25285


  6%|▌         | 516/8322 [17:07:34<197:37:02, 91.14s/it]

25177


  6%|▌         | 520/8322 [17:10:42<155:01:42, 71.53s/it]

25408


  6%|▋         | 522/8322 [17:13:26<160:35:58, 74.12s/it]

25372


  6%|▋         | 526/8322 [17:16:13<131:59:08, 60.95s/it]

25194


  6%|▋         | 527/8322 [17:18:56<159:50:46, 73.82s/it]

25141


  6%|▋         | 529/8322 [17:21:17<157:58:51, 72.98s/it]

25207


  6%|▋         | 531/8322 [17:24:37<173:56:25, 80.37s/it]

25520


  6%|▋         | 533/8322 [17:27:49<183:41:23, 84.90s/it]

25144


  6%|▋         | 534/8322 [17:30:53<219:28:28, 101.45s/it]

25414


  6%|▋         | 535/8322 [17:34:10<259:27:21, 119.95s/it]

25573


  6%|▋         | 536/8322 [17:36:54<280:04:18, 129.50s/it]

25560


  6%|▋         | 538/8322 [17:40:00<249:57:33, 115.60s/it]

25508


  7%|▋         | 542/8322 [17:43:03<170:51:55, 79.06s/it] 

25279


  7%|▋         | 549/8322 [17:46:36<111:20:08, 51.56s/it]

25200


  7%|▋         | 553/8322 [17:50:14<113:15:28, 52.48s/it]

25121


  7%|▋         | 555/8322 [17:53:07<126:48:57, 58.78s/it]

25325


  7%|▋         | 557/8322 [17:55:35<133:32:33, 61.91s/it]

25333


  7%|▋         | 558/8322 [17:58:34<166:09:21, 77.04s/it]

25162


  7%|▋         | 561/8322 [18:01:43<155:18:26, 72.04s/it]

25264


  7%|▋         | 562/8322 [18:05:39<206:34:48, 95.84s/it]

25331


  7%|▋         | 563/8322 [18:09:09<248:55:47, 115.50s/it]

25349


  7%|▋         | 564/8322 [18:12:36<287:55:06, 133.60s/it]

25418


  7%|▋         | 567/8322 [18:15:27<212:11:18, 98.50s/it] 

25130


  7%|▋         | 568/8322 [18:18:49<251:59:10, 116.99s/it]

25701


  7%|▋         | 570/8322 [18:22:01<236:35:28, 109.87s/it]

25856


  7%|▋         | 571/8322 [18:25:23<275:16:37, 127.85s/it]

25634


  7%|▋         | 574/8322 [18:28:32<211:35:31, 98.31s/it] 

25619


  7%|▋         | 577/8322 [18:31:37<180:40:13, 83.98s/it]

25710


  7%|▋         | 578/8322 [18:35:21<228:07:10, 106.05s/it]

25632


  7%|▋         | 580/8322 [18:38:48<226:26:02, 105.29s/it]

25631


  7%|▋         | 582/8322 [18:42:05<221:47:54, 103.16s/it]

25847


  7%|▋         | 587/8322 [18:45:59<158:19:23, 73.69s/it] 

25610


  7%|▋         | 590/8322 [18:50:08<164:24:55, 76.55s/it]

25605


  7%|▋         | 591/8322 [18:52:17<178:43:06, 83.22s/it]

25855


  7%|▋         | 594/8322 [18:55:22<162:11:53, 75.56s/it]

25614


  7%|▋         | 595/8322 [19:00:08<227:35:27, 106.03s/it]

25809


  7%|▋         | 598/8322 [19:02:50<184:42:29, 86.09s/it] 

25687


  7%|▋         | 599/8322 [19:05:30<209:12:43, 97.52s/it]

25766


  7%|▋         | 601/8322 [19:08:10<197:37:54, 92.15s/it]

25674


  7%|▋         | 606/8322 [19:11:06<133:55:43, 62.49s/it]

25802


  7%|▋         | 608/8322 [19:14:26<152:16:25, 71.06s/it]

25671


  7%|▋         | 610/8322 [19:17:03<156:12:01, 72.92s/it]

25590


  7%|▋         | 612/8322 [19:19:46<160:53:44, 75.13s/it]

25705


  7%|▋         | 614/8322 [19:22:50<170:34:16, 79.66s/it]

25824


  7%|▋         | 616/8322 [19:26:26<187:36:39, 87.65s/it]

25787


  7%|▋         | 617/8322 [19:29:16<217:04:15, 101.42s/it]

25833


  7%|▋         | 622/8322 [19:33:17<154:54:14, 72.42s/it] 

25857


  8%|▊         | 625/8322 [19:36:13<145:41:15, 68.14s/it]

25630


  8%|▊         | 629/8322 [19:40:05<137:20:54, 64.27s/it]

25616


  8%|▊         | 642/8322 [19:43:25<70:47:38, 33.18s/it] 

25773


  8%|▊         | 643/8322 [19:46:03<88:07:41, 41.32s/it]

25613


  8%|▊         | 644/8322 [19:49:51<122:06:16, 57.25s/it]

25654


  8%|▊         | 645/8322 [19:52:36<147:00:41, 68.94s/it]

25589


  8%|▊         | 646/8322 [19:55:39<179:47:35, 84.32s/it]

26132


  8%|▊         | 648/8322 [19:58:48<185:48:04, 87.16s/it]

25931


  8%|▊         | 651/8322 [20:01:35<160:35:13, 75.36s/it]

26159


  8%|▊         | 653/8322 [20:04:14<162:54:16, 76.47s/it]

26331


  8%|▊         | 656/8322 [20:06:59<146:31:07, 68.81s/it]

26136


  8%|▊         | 663/8322 [20:09:07<87:47:07, 41.26s/it] 

26160


  8%|▊         | 666/8322 [20:11:45<93:46:26, 44.09s/it]

26038


  8%|▊         | 669/8322 [20:15:25<110:08:30, 51.81s/it]

26259


  8%|▊         | 672/8322 [20:19:26<126:34:52, 59.57s/it]

26067


  8%|▊         | 673/8322 [20:22:37<158:49:15, 74.75s/it]

25885


  8%|▊         | 680/8322 [20:26:47<114:19:06, 53.85s/it]

26074


  8%|▊         | 687/8322 [20:29:30<86:05:34, 40.59s/it] 

26284


  8%|▊         | 688/8322 [20:32:22<108:44:47, 51.28s/it]

26291


  8%|▊         | 689/8322 [20:35:32<139:12:52, 65.66s/it]

26002


  8%|▊         | 696/8322 [20:38:05<91:54:55, 43.39s/it] 

25940


  8%|▊         | 698/8322 [20:41:06<109:01:54, 51.48s/it]

25957


  8%|▊         | 701/8322 [20:43:57<112:04:01, 52.94s/it]

26012


  8%|▊         | 702/8322 [20:47:41<153:14:57, 72.40s/it]

26178


  8%|▊         | 703/8322 [20:50:39<184:22:38, 87.12s/it]

26194


  8%|▊         | 705/8322 [20:53:25<181:57:24, 86.00s/it]

26163


  8%|▊         | 706/8322 [20:56:48<223:46:40, 105.78s/it]

26240


  9%|▊         | 710/8322 [20:59:49<160:51:09, 76.07s/it] 

26317


  9%|▊         | 711/8322 [21:02:12<181:47:34, 85.99s/it]

26174


  9%|▊         | 716/8322 [21:05:05<125:36:01, 59.45s/it]

26095


  9%|▊         | 717/8322 [21:07:58<156:29:45, 74.08s/it]

26021


  9%|▊         | 718/8322 [21:10:52<189:20:11, 89.64s/it]

26024


  9%|▊         | 722/8322 [21:13:28<138:57:51, 65.83s/it]

25860


  9%|▊         | 723/8322 [21:17:09<186:04:03, 88.15s/it]

26129


  9%|▊         | 725/8322 [21:21:18<208:19:11, 98.72s/it]

25991


  9%|▉         | 733/8322 [21:23:51<103:21:36, 49.03s/it]

26084


  9%|▉         | 737/8322 [21:27:28<106:39:03, 50.62s/it]

26027


  9%|▉         | 742/8322 [21:29:27<86:33:39, 41.11s/it] 

26537


  9%|▉         | 746/8322 [21:33:08<94:58:43, 45.13s/it]

26489


  9%|▉         | 750/8322 [21:36:18<96:26:07, 45.85s/it]

26589


  9%|▉         | 753/8322 [21:38:46<98:10:30, 46.69s/it]

26545


  9%|▉         | 767/8322 [21:41:32<53:06:38, 25.31s/it]

26394


  9%|▉         | 772/8322 [21:44:48<60:04:42, 28.65s/it]

26526


  9%|▉         | 773/8322 [21:47:56<81:19:34, 38.78s/it]

26409


  9%|▉         | 783/8322 [21:51:01<60:56:56, 29.10s/it]

26437


  9%|▉         | 786/8322 [21:53:47<70:18:43, 33.59s/it]

26411


  9%|▉         | 787/8322 [21:56:14<88:00:43, 42.05s/it]

26560


 10%|▉         | 791/8322 [21:59:08<88:46:24, 42.44s/it]

26601


 10%|▉         | 793/8322 [22:02:07<106:06:42, 50.74s/it]

26593


 10%|▉         | 794/8322 [22:04:38<129:38:03, 61.99s/it]

26532


 10%|▉         | 796/8322 [22:07:21<139:25:20, 66.69s/it]

26405


 10%|▉         | 799/8322 [22:10:16<133:23:49, 63.83s/it]

26585


 10%|▉         | 803/8322 [22:13:14<117:24:59, 56.22s/it]

26491


 10%|▉         | 804/8322 [22:20:09<209:54:29, 100.51s/it]

26514


 10%|▉         | 805/8322 [22:23:05<233:18:28, 111.73s/it]

26749


 10%|▉         | 809/8322 [22:25:19<158:02:42, 75.73s/it] 

26662


 10%|▉         | 810/8322 [22:28:04<184:09:41, 88.26s/it]

11427


 10%|▉         | 812/8322 [22:30:58<183:24:00, 87.91s/it]

26685


 10%|▉         | 813/8322 [22:33:01<196:00:46, 93.97s/it]

11436


 10%|▉         | 819/8322 [22:36:01<116:36:48, 55.95s/it]

11387


 10%|▉         | 825/8322 [22:39:12<93:32:43, 44.92s/it] 

11290


 10%|▉         | 828/8322 [22:42:52<108:07:00, 51.94s/it]

11314


 10%|█         | 833/8322 [22:45:34<92:58:55, 44.70s/it] 

11206


 10%|█         | 842/8322 [22:49:03<71:04:40, 34.21s/it]

26738


 10%|█         | 844/8322 [22:53:45<100:47:34, 48.52s/it]

11378


 10%|█         | 851/8322 [22:57:37<87:56:53, 42.38s/it] 

11219


 10%|█         | 854/8322 [23:00:25<93:22:32, 45.01s/it]

11205


 10%|█         | 856/8322 [23:04:02<114:14:54, 55.09s/it]

11216


 10%|█         | 859/8322 [23:06:19<109:17:23, 52.72s/it]

11224


 10%|█         | 873/8322 [23:08:59<55:24:11, 26.78s/it] 

26649


 11%|█         | 874/8322 [23:11:38<71:49:41, 34.72s/it]

11325


 11%|█         | 876/8322 [23:14:48<90:06:34, 43.57s/it]

11283


 11%|█         | 880/8322 [23:17:38<89:25:01, 43.25s/it]

26729


 11%|█         | 883/8322 [23:21:06<102:22:59, 49.55s/it]

11416


 11%|█         | 884/8322 [23:24:31<135:23:22, 65.53s/it]

11277


 11%|█         | 889/8322 [23:27:00<104:04:46, 50.41s/it]

11279


 11%|█         | 891/8322 [23:30:43<128:40:48, 62.34s/it]

11388


 11%|█         | 892/8322 [23:33:00<147:29:23, 71.46s/it]

11447


 11%|█         | 893/8322 [23:36:00<180:45:32, 87.59s/it]

11521


 11%|█         | 897/8322 [23:39:43<150:26:01, 72.94s/it]

11570


 11%|█         | 899/8322 [23:42:42<158:46:45, 77.00s/it]

11797


 11%|█         | 902/8322 [23:45:22<141:53:49, 68.84s/it]

11854


 11%|█         | 904/8322 [23:48:16<150:58:15, 73.27s/it]

11857


 11%|█         | 907/8322 [23:51:18<142:05:02, 68.98s/it]

11728


 11%|█         | 911/8322 [23:54:18<122:15:49, 59.39s/it]

11867


 11%|█         | 918/8322 [23:56:20<79:05:25, 38.46s/it] 

11835


 11%|█         | 920/8322 [23:58:58<93:14:40, 45.35s/it]

11837


 11%|█         | 921/8322 [24:01:51<121:30:05, 59.10s/it]

11625


 11%|█         | 922/8322 [24:04:48<153:44:27, 74.79s/it]

11891


 11%|█         | 925/8322 [24:07:40<140:38:56, 68.45s/it]

11742


 11%|█         | 926/8322 [24:10:29<171:17:54, 83.38s/it]

11890


 11%|█         | 935/8322 [24:13:12<83:20:58, 40.62s/it] 

11704


 11%|█         | 936/8322 [24:16:13<110:21:14, 53.79s/it]

11712


 11%|█▏        | 943/8322 [24:19:02<80:47:02, 39.41s/it] 

11882


 11%|█▏        | 944/8322 [24:22:10<108:16:14, 52.83s/it]

11608


 11%|█▏        | 945/8322 [24:24:52<133:44:01, 65.26s/it]

11525


 11%|█▏        | 948/8322 [24:27:45<128:28:46, 62.72s/it]

11720


 11%|█▏        | 955/8322 [24:30:24<85:35:26, 41.83s/it] 

11851


 12%|█▏        | 961/8322 [24:34:07<81:53:59, 40.05s/it]

11451


 12%|█▏        | 963/8322 [24:38:40<112:38:53, 55.11s/it]

12110


 12%|█▏        | 969/8322 [24:42:01<94:52:37, 46.45s/it] 

12106


 12%|█▏        | 972/8322 [24:46:12<111:43:15, 54.72s/it]

12239


 12%|█▏        | 973/8322 [24:49:28<139:31:16, 68.35s/it]

11910


 12%|█▏        | 975/8322 [24:52:29<149:13:36, 73.12s/it]

12056


 12%|█▏        | 977/8322 [24:56:43<174:52:45, 85.71s/it]

12191


 12%|█▏        | 979/8322 [24:59:16<170:10:26, 83.43s/it]

12082


 12%|█▏        | 986/8322 [25:02:36<107:45:36, 52.88s/it]

12075


 12%|█▏        | 987/8322 [25:05:44<135:55:21, 66.71s/it]

12256


 12%|█▏        | 994/8322 [25:08:17<89:39:08, 44.04s/it] 

12226


 12%|█▏        | 1000/8322 [25:10:41<74:00:30, 36.39s/it]

12228


 12%|█▏        | 1003/8322 [25:14:08<88:05:19, 43.33s/it]

12078


 12%|█▏        | 1004/8322 [25:16:42<108:53:46, 53.57s/it]

11985


 12%|█▏        | 1008/8322 [25:19:51<104:32:56, 51.46s/it]

12192


 12%|█▏        | 1012/8322 [25:22:55<100:44:02, 49.61s/it]

12087


 12%|█▏        | 1018/8322 [25:26:00<84:50:26, 41.82s/it] 

12042


 12%|█▏        | 1026/8322 [25:29:47<72:42:26, 35.88s/it]

12253


 12%|█▏        | 1027/8322 [25:32:57<95:33:07, 47.15s/it]

12029


 12%|█▏        | 1028/8322 [25:35:12<112:18:29, 55.43s/it]

12074


 12%|█▏        | 1029/8322 [25:37:12<127:50:23, 63.10s/it]

12281


 12%|█▏        | 1032/8322 [25:41:25<142:21:28, 70.30s/it]

11963


 12%|█▏        | 1039/8322 [25:42:05<180:09:30, 89.05s/it]


KeyboardInterrupt: 

In [85]:
pickle.dump(result_dict_story_gen_emb_tfidf_novel_method_w_distributed_attention, open('25_result_dict_story_gen_emb_tfidf_novel_method_w_distributed_attention_v128.p', 'wb'))

In [86]:
# seed_documents_for_experiments = list(result_dict_story_gen_emb_tfidf_novel_method_w_distributed_attention.keys())

In [93]:
# pickle.dump(title_id_dict, open('25_title_id_dict_v128.p', 'wb'))
# pickle.dump(abstract_id_dict, open('25_abstract_id_dict_v128.p', 'wb'))
# pickle.dump(text_id_dict, open('25_text_id_dict_v128.p', 'wb'))
# pickle.dump(date_id_dict, open('25_date_id_dict_v128.p', 'wb'))
# pickle.dump(tf_idf_dict, open('25_tf_idf_dict_v128.p', 'wb'))
# pickle.dump(data_timestamps, open('25_data_timestamps_v128.p', 'wb'))
# pickle.dump(contextual_doc_embedding, open('25_contextual_doc_embedding_v128.p', 'wb'))
# # pickle.dump(contextual_bert_embedding, open('25_contextual_bert_embedding_v128.p', 'wb'))
# pickle.dump(contextual_dict, open('25_contextual_dict_v128.p','wb'))
# pickle.dump(list_of_words_in_tfidf, open('25_list_of_words_in_tfidf_v128.p', 'wb'))
# pickle.dump(seed_documents_for_experiments, open('25_seed_documents_for_experiments_v128.p', 'wb'))

In [91]:
# keyword based model
seen_doc = set()
result_dict_story_gen_just_tfidf_novel_method = dict()
for se in tqdm(seed_documents_for_experiments):
    if se in seen_doc:
        continue
    print(se)
    res = story_gen_just_tfidf_novel_method(se, collect_ids, min_acceptable_similarity= 0.94, min_keyword_overlapping= 0.6) 
    result_dict_story_gen_just_tfidf_novel_method[se] = res
    for d in res:
        seen_doc.add(d)

  0%|          | 0/471 [00:00<?, ?it/s]

22989


  0%|          | 1/471 [00:59<7:44:01, 59.24s/it]

23079


  0%|          | 2/471 [01:58<7:42:21, 59.15s/it]

23002


  1%|          | 3/471 [02:57<7:40:56, 59.09s/it]

23124


  1%|          | 4/471 [03:56<7:39:54, 59.09s/it]

23131


  1%|          | 5/471 [04:54<7:36:45, 58.81s/it]

23119


  1%|▏         | 6/471 [05:51<7:31:11, 58.22s/it]

23192


  1%|▏         | 7/471 [06:49<7:28:39, 58.02s/it]

23066


  2%|▏         | 8/471 [07:46<7:26:22, 57.85s/it]

22988


  2%|▏         | 9/471 [08:43<7:23:31, 57.60s/it]

23159


  2%|▏         | 10/471 [09:41<7:22:37, 57.61s/it]

23107


  2%|▏         | 11/471 [10:38<7:20:31, 57.46s/it]

23022


  3%|▎         | 12/471 [11:35<7:18:21, 57.30s/it]

23045


  3%|▎         | 13/471 [12:32<7:16:53, 57.23s/it]

23110


  3%|▎         | 14/471 [13:29<7:15:15, 57.15s/it]

23084


  3%|▎         | 15/471 [14:27<7:14:56, 57.23s/it]

23017


  3%|▎         | 16/471 [15:23<7:12:45, 57.07s/it]

23036


  4%|▎         | 17/471 [16:21<7:12:10, 57.12s/it]

23023


  4%|▍         | 18/471 [17:18<7:11:23, 57.14s/it]

23005


  4%|▍         | 19/471 [18:15<7:10:57, 57.21s/it]

23123


  4%|▍         | 20/471 [19:12<7:09:18, 57.11s/it]

23061


  4%|▍         | 21/471 [20:09<7:07:40, 57.02s/it]

23121


  5%|▍         | 22/471 [21:06<7:07:29, 57.12s/it]

23073


  5%|▍         | 23/471 [22:03<7:06:10, 57.08s/it]

23020


  5%|▌         | 24/471 [23:01<7:06:03, 57.19s/it]

23163


  5%|▌         | 25/471 [23:58<7:05:33, 57.25s/it]

23177


  6%|▌         | 26/471 [24:56<7:05:18, 57.34s/it]

23077


  6%|▌         | 27/471 [25:53<7:04:37, 57.38s/it]

23130


  6%|▌         | 28/471 [26:50<7:02:55, 57.28s/it]

23188


  6%|▌         | 29/471 [27:47<7:02:22, 57.34s/it]

23054


  6%|▋         | 30/471 [28:45<7:02:14, 57.45s/it]

22990


  7%|▋         | 31/471 [29:43<7:01:04, 57.42s/it]

23189


  7%|▋         | 32/471 [30:40<7:00:02, 57.41s/it]

23115


  7%|▋         | 33/471 [31:37<6:58:36, 57.34s/it]

23040


  7%|▋         | 34/471 [32:34<6:57:24, 57.31s/it]

23013


  7%|▋         | 35/471 [33:32<6:57:00, 57.39s/it]

23185


  8%|▊         | 36/471 [34:29<6:55:32, 57.32s/it]

23126


  8%|▊         | 37/471 [35:27<6:55:02, 57.38s/it]

22997


  8%|▊         | 38/471 [36:24<6:53:13, 57.26s/it]

23060


  8%|▊         | 39/471 [37:21<6:52:07, 57.24s/it]

23041


  8%|▊         | 40/471 [38:18<6:50:32, 57.15s/it]

22986


  9%|▊         | 41/471 [39:15<6:50:34, 57.29s/it]

23004


  9%|▉         | 42/471 [40:12<6:49:19, 57.25s/it]

23137


  9%|▉         | 43/471 [41:09<6:47:49, 57.17s/it]

23057


  9%|▉         | 44/471 [42:07<6:46:38, 57.14s/it]

23136


 10%|▉         | 45/471 [43:04<6:46:39, 57.27s/it]

23068


 10%|▉         | 46/471 [44:01<6:44:45, 57.14s/it]

23064


 10%|▉         | 47/471 [44:58<6:43:59, 57.17s/it]

23146


 10%|█         | 48/471 [45:55<6:43:08, 57.18s/it]

23166


 10%|█         | 49/471 [46:52<6:41:49, 57.13s/it]

23055


 11%|█         | 50/471 [47:49<6:40:23, 57.06s/it]

23026


 11%|█         | 51/471 [48:47<6:39:59, 57.14s/it]

23018


 11%|█         | 52/471 [49:44<6:38:53, 57.12s/it]

22996


 11%|█▏        | 53/471 [50:41<6:37:56, 57.12s/it]

22998


 11%|█▏        | 54/471 [51:38<6:37:26, 57.19s/it]

23006


 12%|█▏        | 55/471 [52:35<6:36:20, 57.17s/it]

23167


 12%|█▏        | 56/471 [53:32<6:35:11, 57.14s/it]

22983


 12%|█▏        | 57/471 [54:30<6:34:18, 57.15s/it]

23080


 12%|█▏        | 58/471 [55:27<6:33:22, 57.15s/it]

23031


 13%|█▎        | 59/471 [56:24<6:31:53, 57.07s/it]

23035


 13%|█▎        | 60/471 [57:21<6:31:01, 57.08s/it]

22994


 13%|█▎        | 61/471 [58:18<6:29:55, 57.06s/it]

23147


 13%|█▎        | 62/471 [59:15<6:30:00, 57.21s/it]

23122


 13%|█▎        | 63/471 [1:00:12<6:28:49, 57.18s/it]

23104


 14%|█▎        | 64/471 [1:01:10<6:29:12, 57.38s/it]

23525


 14%|█▍        | 65/471 [1:02:06<6:25:43, 57.00s/it]

23565


 14%|█▍        | 66/471 [1:03:03<6:23:14, 56.78s/it]

23312


 14%|█▍        | 68/471 [1:03:59<4:52:54, 43.61s/it]

23400


 15%|█▍        | 69/471 [1:04:56<5:14:25, 46.93s/it]

23344


 15%|█▍        | 70/471 [1:05:53<5:31:15, 49.57s/it]

23461


 15%|█▌        | 71/471 [1:06:50<5:43:29, 51.52s/it]

23331


 15%|█▌        | 72/471 [1:07:46<5:51:57, 52.93s/it]

23197


 15%|█▌        | 73/471 [1:08:43<5:58:12, 54.00s/it]

23298


 16%|█▌        | 74/471 [1:09:40<6:02:59, 54.86s/it]

23551


 16%|█▌        | 75/471 [1:10:37<6:05:34, 55.39s/it]

23481


 16%|█▋        | 77/471 [1:11:34<4:42:59, 43.09s/it]

23548


 17%|█▋        | 78/471 [1:12:31<5:04:44, 46.53s/it]

23466


 17%|█▋        | 79/471 [1:13:27<5:21:01, 49.14s/it]

23474


 17%|█▋        | 80/471 [1:14:24<5:33:35, 51.19s/it]

23553


 17%|█▋        | 81/471 [1:15:20<5:42:13, 52.65s/it]

23431


 17%|█▋        | 82/471 [1:16:16<5:47:37, 53.62s/it]

23206


 18%|█▊        | 84/471 [1:17:13<4:32:39, 42.27s/it]

23395


 18%|█▊        | 86/471 [1:18:09<3:55:55, 36.77s/it]

23291


 18%|█▊        | 87/471 [1:19:06<4:22:53, 41.08s/it]

23569


 19%|█▊        | 88/471 [1:20:03<4:46:08, 44.83s/it]

23624


 19%|█▉        | 89/471 [1:21:00<5:04:42, 47.86s/it]

23511


 19%|█▉        | 90/471 [1:21:56<5:18:10, 50.11s/it]

23296


 19%|█▉        | 91/471 [1:22:52<5:28:37, 51.89s/it]

23397


 20%|█▉        | 92/471 [1:23:49<5:35:30, 53.12s/it]

23248


 20%|█▉        | 93/471 [1:24:45<5:40:42, 54.08s/it]

23570


 20%|█▉        | 94/471 [1:25:42<5:44:02, 54.76s/it]

23442


 20%|██        | 95/471 [1:26:38<5:46:26, 55.28s/it]

23460


 20%|██        | 96/471 [1:27:34<5:46:51, 55.50s/it]

23193


 21%|██        | 97/471 [1:28:31<5:47:46, 55.79s/it]

23608


 21%|██        | 98/471 [1:29:28<5:49:04, 56.15s/it]

23199


 21%|██        | 99/471 [1:30:24<5:48:51, 56.27s/it]

23550


 21%|██        | 100/471 [1:31:20<5:47:59, 56.28s/it]

23417


 21%|██▏       | 101/471 [1:32:17<5:47:00, 56.27s/it]

23266


 22%|██▏       | 102/471 [1:33:13<5:46:32, 56.35s/it]

23480


 22%|██▏       | 103/471 [1:34:10<5:45:57, 56.41s/it]

23613


 22%|██▏       | 105/471 [1:35:06<4:24:50, 43.42s/it]

23581


 23%|██▎       | 106/471 [1:36:03<4:43:39, 46.63s/it]

23313


 23%|██▎       | 107/471 [1:36:59<4:58:50, 49.26s/it]

23618


 23%|██▎       | 108/471 [1:37:56<5:10:21, 51.30s/it]

23428


 23%|██▎       | 109/471 [1:38:53<5:18:54, 52.86s/it]

23383


 23%|██▎       | 110/471 [1:39:49<5:24:12, 53.89s/it]

23458


 24%|██▎       | 111/471 [1:40:46<5:28:26, 54.74s/it]

23216


 24%|██▍       | 113/471 [1:41:43<4:14:56, 42.73s/it]

23468


 24%|██▍       | 114/471 [1:42:39<4:33:54, 46.04s/it]

23260


 24%|██▍       | 115/471 [1:43:36<4:49:23, 48.77s/it]

23443


 25%|██▍       | 116/471 [1:44:32<5:00:41, 50.82s/it]

23541


 25%|██▍       | 117/471 [1:45:29<5:09:31, 52.46s/it]

23487


 25%|██▌       | 119/471 [1:46:25<4:04:28, 41.67s/it]

23252


 26%|██▌       | 121/471 [1:47:21<3:32:05, 36.36s/it]

23445


 26%|██▌       | 122/471 [1:48:17<3:56:33, 40.67s/it]

23337


 26%|██▌       | 123/471 [1:49:13<4:17:09, 44.34s/it]

23210


 26%|██▋       | 124/471 [1:50:09<4:33:32, 47.30s/it]

23603


 27%|██▋       | 125/471 [1:51:05<4:46:21, 49.66s/it]

23749


 27%|██▋       | 126/471 [1:52:01<4:55:27, 51.38s/it]

23723


 27%|██▋       | 127/471 [1:52:57<5:01:37, 52.61s/it]

23776


 27%|██▋       | 129/471 [1:53:53<3:56:52, 41.56s/it]

23664


 28%|██▊       | 131/471 [1:54:49<3:25:19, 36.24s/it]

23899


 28%|██▊       | 132/471 [1:55:44<3:48:41, 40.48s/it]

23887


 28%|██▊       | 133/471 [1:56:41<4:09:08, 44.23s/it]

23854


 29%|██▊       | 135/471 [1:57:37<3:31:22, 37.75s/it]

23756


 29%|██▉       | 136/471 [1:58:33<3:53:37, 41.84s/it]

23810


 29%|██▉       | 137/471 [1:59:29<4:11:57, 45.26s/it]

23688


 29%|██▉       | 138/471 [2:00:25<4:26:08, 47.95s/it]

23902


 30%|██▉       | 139/471 [2:01:21<4:37:03, 50.07s/it]

23845


 30%|██▉       | 140/471 [2:02:16<4:44:43, 51.61s/it]

23721


 30%|███       | 142/471 [2:03:12<3:45:29, 41.12s/it]

23859


 30%|███       | 143/471 [2:04:08<4:04:26, 44.71s/it]

23625


 31%|███       | 145/471 [2:05:04<3:25:56, 37.90s/it]

23833


 31%|███       | 146/471 [2:06:00<3:47:19, 41.97s/it]

23862


 31%|███       | 147/471 [2:06:56<4:04:44, 45.32s/it]

23634


 31%|███▏      | 148/471 [2:07:52<4:18:48, 48.08s/it]

23827


 32%|███▏      | 149/471 [2:08:48<4:29:31, 50.22s/it]

23631


 32%|███▏      | 150/471 [2:09:43<4:36:55, 51.76s/it]

23916


 32%|███▏      | 151/471 [2:10:40<4:42:32, 52.98s/it]

23895


 32%|███▏      | 152/471 [2:11:36<4:46:18, 53.85s/it]

23733


 32%|███▏      | 153/471 [2:12:31<4:48:22, 54.41s/it]

23628


 33%|███▎      | 154/471 [2:13:28<4:50:46, 55.04s/it]

23788


 33%|███▎      | 155/471 [2:14:24<4:51:46, 55.40s/it]

23901


 33%|███▎      | 156/471 [2:15:20<4:51:49, 55.59s/it]

23638


 33%|███▎      | 157/471 [2:16:17<4:52:44, 55.94s/it]

23826


 34%|███▎      | 158/471 [2:17:13<4:52:02, 55.98s/it]

23643


 34%|███▍      | 159/471 [2:18:09<4:51:05, 55.98s/it]

23705


 34%|███▍      | 160/471 [2:19:05<4:50:09, 55.98s/it]

23875


 34%|███▍      | 161/471 [2:20:01<4:49:00, 55.94s/it]

23831


 34%|███▍      | 162/471 [2:20:57<4:48:28, 56.02s/it]

23871


 35%|███▍      | 164/471 [2:21:53<3:40:00, 43.00s/it]

23828


 35%|███▌      | 165/471 [2:22:48<3:55:38, 46.20s/it]

23794


 35%|███▌      | 166/471 [2:23:44<4:07:48, 48.75s/it]

23777


 35%|███▌      | 167/471 [2:24:41<4:17:18, 50.78s/it]

23735


 36%|███▌      | 168/471 [2:25:36<4:23:31, 52.18s/it]

23896


 36%|███▌      | 169/471 [2:26:33<4:28:39, 53.37s/it]

23714


 36%|███▌      | 170/471 [2:27:29<4:31:15, 54.07s/it]

23829


 36%|███▋      | 171/471 [2:28:24<4:32:50, 54.57s/it]

24089


 37%|███▋      | 172/471 [2:29:20<4:33:16, 54.84s/it]

24025


 37%|███▋      | 173/471 [2:30:15<4:33:26, 55.06s/it]

24075


 37%|███▋      | 174/471 [2:31:11<4:32:47, 55.11s/it]

23941


 37%|███▋      | 175/471 [2:32:06<4:32:52, 55.31s/it]

24125


 37%|███▋      | 176/471 [2:33:02<4:31:55, 55.31s/it]

24061


 38%|███▊      | 177/471 [2:33:57<4:31:30, 55.41s/it]

24193


 38%|███▊      | 178/471 [2:34:53<4:30:35, 55.41s/it]

24026


 38%|███▊      | 179/471 [2:35:48<4:30:06, 55.50s/it]

24325


 38%|███▊      | 180/471 [2:36:44<4:29:05, 55.48s/it]

24303


 38%|███▊      | 181/471 [2:37:39<4:27:49, 55.41s/it]

24239


 39%|███▊      | 182/471 [2:38:34<4:26:38, 55.36s/it]

24148


 39%|███▉      | 183/471 [2:39:29<4:25:18, 55.27s/it]

24240


 39%|███▉      | 184/471 [2:40:25<4:24:36, 55.32s/it]

24013


 39%|███▉      | 185/471 [2:41:20<4:23:08, 55.21s/it]

24042


 39%|███▉      | 186/471 [2:42:15<4:22:28, 55.26s/it]

24194


 40%|███▉      | 188/471 [2:43:11<3:20:53, 42.59s/it]

24268


 40%|████      | 189/471 [2:44:07<3:35:23, 45.83s/it]

24132


 41%|████      | 191/471 [2:45:02<2:59:10, 38.39s/it]

24316


 41%|████      | 192/471 [2:45:58<3:16:50, 42.33s/it]

24153


 41%|████      | 193/471 [2:46:53<3:30:52, 45.51s/it]

24146


 41%|████      | 194/471 [2:47:49<3:42:10, 48.13s/it]

24338


 41%|████▏     | 195/471 [2:48:44<3:50:13, 50.05s/it]

23990


 42%|████▏     | 197/471 [2:49:39<3:03:56, 40.28s/it]

23952


 42%|████▏     | 198/471 [2:50:34<3:19:18, 43.80s/it]

24180


 42%|████▏     | 199/471 [2:51:30<3:31:46, 46.72s/it]

24088


 42%|████▏     | 200/471 [2:52:25<3:41:23, 49.02s/it]

23954


 43%|████▎     | 201/471 [2:53:21<3:48:45, 50.83s/it]

24067


 43%|████▎     | 202/471 [2:54:16<3:54:04, 52.21s/it]

24190


 43%|████▎     | 203/471 [2:55:12<3:57:04, 53.08s/it]

24137


 43%|████▎     | 204/471 [2:56:07<3:59:07, 53.73s/it]

24271


 44%|████▎     | 205/471 [2:57:02<4:00:34, 54.26s/it]

24313


 44%|████▎     | 206/471 [2:57:58<4:00:58, 54.56s/it]

23951


 44%|████▍     | 208/471 [2:58:53<3:05:03, 42.22s/it]

24219


 44%|████▍     | 209/471 [2:59:48<3:18:30, 45.46s/it]

23975


 45%|████▍     | 210/471 [3:00:44<3:29:28, 48.16s/it]

24169


 45%|████▍     | 211/471 [3:01:40<3:37:10, 50.12s/it]

24150


 45%|████▌     | 212/471 [3:02:35<3:42:47, 51.61s/it]

23969


 45%|████▌     | 213/471 [3:03:31<3:46:47, 52.74s/it]

24000


 45%|████▌     | 214/471 [3:04:27<3:49:53, 53.67s/it]

24633


 46%|████▌     | 215/471 [3:05:21<3:50:26, 54.01s/it]

24610


 46%|████▌     | 217/471 [3:06:16<2:57:08, 41.84s/it]

24608


 46%|████▋     | 219/471 [3:07:11<2:31:47, 36.14s/it]

24734


 47%|████▋     | 220/471 [3:08:05<2:48:08, 40.19s/it]

24729


 47%|████▋     | 221/471 [3:09:00<3:01:30, 43.56s/it]

24376


 47%|████▋     | 222/471 [3:09:55<3:12:51, 46.47s/it]

24446


 47%|████▋     | 223/471 [3:10:49<3:21:03, 48.64s/it]

24747


 48%|████▊     | 224/471 [3:11:44<3:26:58, 50.28s/it]

24352


 48%|████▊     | 225/471 [3:12:39<3:31:48, 51.66s/it]

24687


 48%|████▊     | 226/471 [3:13:34<3:35:20, 52.74s/it]

24396


 48%|████▊     | 227/471 [3:14:29<3:36:58, 53.35s/it]

24374


 48%|████▊     | 228/471 [3:15:24<3:37:44, 53.77s/it]

24361


 49%|████▊     | 229/471 [3:16:19<3:37:51, 54.01s/it]

24450


 49%|████▉     | 230/471 [3:17:13<3:37:26, 54.13s/it]

24419


 49%|████▉     | 231/471 [3:18:08<3:37:15, 54.31s/it]

24468


 49%|████▉     | 232/471 [3:19:02<3:36:43, 54.41s/it]

24696


 49%|████▉     | 233/471 [3:19:58<3:36:51, 54.67s/it]

24415


 50%|████▉     | 234/471 [3:20:52<3:35:51, 54.65s/it]

24509


 50%|████▉     | 235/471 [3:21:47<3:35:20, 54.75s/it]

24728


 50%|█████     | 236/471 [3:22:42<3:34:33, 54.78s/it]

24702


 50%|█████     | 237/471 [3:23:37<3:33:27, 54.73s/it]

24500


 51%|█████     | 238/471 [3:24:31<3:32:19, 54.67s/it]

24344


 51%|█████     | 239/471 [3:25:26<3:31:24, 54.68s/it]

24479


 51%|█████     | 240/471 [3:26:20<3:30:17, 54.62s/it]

24614


 51%|█████     | 241/471 [3:27:15<3:29:17, 54.60s/it]

24612


 51%|█████▏    | 242/471 [3:28:10<3:28:26, 54.61s/it]

24497


 52%|█████▏    | 243/471 [3:29:04<3:27:40, 54.65s/it]

24670


 52%|█████▏    | 244/471 [3:29:59<3:26:48, 54.66s/it]

24622


 52%|█████▏    | 245/471 [3:30:53<3:25:40, 54.61s/it]

24750


 52%|█████▏    | 246/471 [3:31:49<3:25:29, 54.80s/it]

24488


 52%|█████▏    | 247/471 [3:32:43<3:24:05, 54.67s/it]

24755


 53%|█████▎    | 248/471 [3:33:37<3:22:46, 54.56s/it]

24689


 53%|█████▎    | 249/471 [3:34:32<3:22:04, 54.61s/it]

24621


 53%|█████▎    | 250/471 [3:35:27<3:21:05, 54.59s/it]

24738


 53%|█████▎    | 251/471 [3:36:21<3:20:14, 54.61s/it]

24508


 54%|█████▎    | 252/471 [3:37:16<3:19:20, 54.62s/it]

24552


 54%|█████▎    | 253/471 [3:38:11<3:18:29, 54.63s/it]

24722


 54%|█████▍    | 255/471 [3:39:05<2:31:22, 42.05s/it]

25024


 55%|█████▍    | 257/471 [3:39:59<2:08:41, 36.08s/it]

24898


 55%|█████▍    | 258/471 [3:40:54<2:22:17, 40.08s/it]

24770


 55%|█████▍    | 259/471 [3:41:49<2:34:13, 43.65s/it]

24884


 55%|█████▌    | 260/471 [3:42:43<2:42:58, 46.35s/it]

24763


 56%|█████▌    | 262/471 [3:43:37<2:13:03, 38.20s/it]

24870


 56%|█████▌    | 263/471 [3:44:31<2:25:12, 41.89s/it]

24918


 56%|█████▌    | 264/471 [3:45:25<2:35:02, 44.94s/it]

24979


 56%|█████▋    | 265/471 [3:46:20<2:42:47, 47.42s/it]

24848


 56%|█████▋    | 266/471 [3:47:14<2:48:28, 49.31s/it]

24880


 57%|█████▋    | 267/471 [3:48:08<2:52:15, 50.67s/it]

24791


 57%|█████▋    | 268/471 [3:49:02<2:54:59, 51.72s/it]

24841


 57%|█████▋    | 269/471 [3:49:56<2:56:18, 52.37s/it]

24790


 57%|█████▋    | 270/471 [3:50:51<2:57:21, 52.94s/it]

24864


 58%|█████▊    | 271/471 [3:51:45<2:57:48, 53.34s/it]

24789


 58%|█████▊    | 273/471 [3:52:39<2:16:23, 41.33s/it]

24891


 58%|█████▊    | 275/471 [3:53:33<1:56:37, 35.70s/it]

24917


 59%|█████▊    | 276/471 [3:54:28<2:09:53, 39.97s/it]

24915


 59%|█████▉    | 277/471 [3:55:23<2:20:33, 43.47s/it]

24932


 59%|█████▉    | 278/471 [3:56:17<2:28:45, 46.25s/it]

25036


 59%|█████▉    | 279/471 [3:57:12<2:34:43, 48.35s/it]

24905


 59%|█████▉    | 280/471 [3:58:06<2:39:02, 49.96s/it]

25022


 60%|█████▉    | 281/471 [3:59:00<2:42:15, 51.24s/it]

24824


 60%|█████▉    | 282/471 [3:59:55<2:44:46, 52.31s/it]

24867


 60%|██████    | 283/471 [4:00:50<2:45:57, 52.97s/it]

24893


 60%|██████    | 284/471 [4:01:44<2:46:25, 53.40s/it]

25315


 61%|██████    | 285/471 [4:02:38<2:45:44, 53.47s/it]

25552


 61%|██████    | 286/471 [4:03:31<2:44:49, 53.46s/it]

25299


 61%|██████    | 287/471 [4:04:25<2:44:05, 53.51s/it]

25197


 61%|██████    | 288/471 [4:05:19<2:43:38, 53.65s/it]

25526


 61%|██████▏   | 289/471 [4:06:13<2:42:57, 53.72s/it]

25180


 62%|██████▏   | 290/471 [4:07:06<2:41:58, 53.69s/it]

25079


 62%|██████▏   | 291/471 [4:08:00<2:41:02, 53.68s/it]

25135


 62%|██████▏   | 293/471 [4:08:53<2:02:25, 41.27s/it]

25184


 62%|██████▏   | 294/471 [4:09:48<2:11:12, 44.48s/it]

25231


 63%|██████▎   | 295/471 [4:10:41<2:17:31, 46.88s/it]

25435


 63%|██████▎   | 297/471 [4:11:36<1:51:32, 38.46s/it]

25572


 63%|██████▎   | 298/471 [4:12:30<2:01:30, 42.14s/it]

25300


 63%|██████▎   | 299/471 [4:13:24<2:09:03, 45.02s/it]

25122


 64%|██████▎   | 300/471 [4:14:17<2:14:43, 47.27s/it]

25322


 64%|██████▍   | 302/471 [4:15:11<1:48:16, 38.44s/it]

25250


 64%|██████▍   | 303/471 [4:16:04<1:57:29, 41.96s/it]

25133


 65%|██████▍   | 304/471 [4:16:57<2:04:46, 44.83s/it]

25285


 65%|██████▍   | 305/471 [4:17:51<2:10:12, 47.07s/it]

25177


 65%|██████▍   | 306/471 [4:18:45<2:14:39, 48.96s/it]

25408


 65%|██████▌   | 307/471 [4:19:38<2:17:28, 50.29s/it]

25141


 66%|██████▌   | 310/471 [4:20:33<1:27:45, 32.71s/it]

25207


 66%|██████▌   | 311/471 [4:21:26<1:38:47, 37.05s/it]

25520


 66%|██████▌   | 312/471 [4:22:20<1:48:20, 40.88s/it]

25144


 66%|██████▋   | 313/471 [4:23:14<1:55:50, 43.99s/it]

25414


 67%|██████▋   | 314/471 [4:24:07<2:01:27, 46.42s/it]

25573


 67%|██████▋   | 315/471 [4:25:01<2:05:35, 48.30s/it]

25560


 67%|██████▋   | 316/471 [4:25:54<2:08:22, 49.69s/it]

25508


 67%|██████▋   | 317/471 [4:26:47<2:10:20, 50.78s/it]

25279


 68%|██████▊   | 318/471 [4:27:41<2:11:31, 51.58s/it]

25200


 68%|██████▊   | 319/471 [4:28:35<2:12:28, 52.29s/it]

25121


 68%|██████▊   | 320/471 [4:29:28<2:12:25, 52.62s/it]

25325


 68%|██████▊   | 321/471 [4:30:22<2:12:24, 52.96s/it]

25333


 68%|██████▊   | 322/471 [4:31:16<2:11:58, 53.15s/it]

25162


 69%|██████▊   | 323/471 [4:32:09<2:11:17, 53.23s/it]

25264


 69%|██████▉   | 324/471 [4:33:03<2:10:34, 53.29s/it]

25331


 69%|██████▉   | 325/471 [4:33:56<2:09:45, 53.33s/it]

25349


 69%|██████▉   | 326/471 [4:34:49<2:08:50, 53.32s/it]

25418


 69%|██████▉   | 327/471 [4:35:43<2:07:59, 53.33s/it]

25130


 70%|██████▉   | 328/471 [4:36:36<2:07:09, 53.35s/it]

25701


 70%|██████▉   | 329/471 [4:37:29<2:06:04, 53.27s/it]

25856


 70%|███████   | 330/471 [4:38:22<2:05:00, 53.20s/it]

25619


 70%|███████   | 332/471 [4:39:16<1:34:53, 40.96s/it]

25710


 71%|███████   | 333/471 [4:40:08<1:40:59, 43.91s/it]

25632


 71%|███████   | 334/471 [4:41:01<1:45:34, 46.24s/it]

25631


 71%|███████   | 335/471 [4:41:54<1:49:06, 48.14s/it]

25847


 71%|███████▏  | 336/471 [4:42:48<1:51:36, 49.60s/it]

25610


 72%|███████▏  | 337/471 [4:43:41<1:52:59, 50.59s/it]

25605


 72%|███████▏  | 338/471 [4:44:34<1:53:32, 51.22s/it]

25855


 72%|███████▏  | 339/471 [4:45:26<1:53:38, 51.66s/it]

25614


 72%|███████▏  | 340/471 [4:46:19<1:53:41, 52.07s/it]

25809


 72%|███████▏  | 341/471 [4:47:13<1:53:38, 52.45s/it]

25766


 73%|███████▎  | 343/471 [4:48:06<1:26:29, 40.55s/it]

25674


 73%|███████▎  | 344/471 [4:48:59<1:32:23, 43.65s/it]

25590


 74%|███████▎  | 347/471 [4:49:52<1:02:39, 30.32s/it]

25705


 74%|███████▍  | 348/471 [4:50:45<1:11:16, 34.77s/it]

25787


 74%|███████▍  | 350/471 [4:51:38<1:04:04, 31.77s/it]

25833


 75%|███████▍  | 351/471 [4:52:31<1:12:16, 36.13s/it]

25857


 75%|███████▍  | 352/471 [4:53:24<1:19:12, 39.94s/it]

25630


 75%|███████▍  | 353/471 [4:54:17<1:24:50, 43.14s/it]

25616


 75%|███████▌  | 354/471 [4:55:10<1:28:57, 45.62s/it]

25773


 75%|███████▌  | 355/471 [4:56:03<1:32:03, 47.62s/it]

25613


 76%|███████▌  | 356/471 [4:56:56<1:34:25, 49.26s/it]

25589


 76%|███████▌  | 358/471 [4:57:49<1:13:46, 39.17s/it]

25931


 76%|███████▋  | 360/471 [4:58:42<1:03:09, 34.14s/it]

26159


 77%|███████▋  | 361/471 [4:59:34<1:10:03, 38.21s/it]

26331


 77%|███████▋  | 362/471 [5:00:27<1:15:37, 41.63s/it]

26136


 77%|███████▋  | 363/471 [5:01:20<1:19:53, 44.39s/it]

26038


 77%|███████▋  | 365/471 [5:02:12<1:04:52, 36.73s/it]

26067


 78%|███████▊  | 367/471 [5:03:04<56:48, 32.77s/it]  

26074


 78%|███████▊  | 369/471 [5:03:57<51:52, 30.52s/it]

26284


 79%|███████▊  | 370/471 [5:04:49<58:43, 34.88s/it]

26291


 79%|███████▉  | 371/471 [5:05:42<1:04:35, 38.75s/it]

26002


 79%|███████▉  | 372/471 [5:06:34<1:09:33, 42.16s/it]

25940


 79%|███████▉  | 373/471 [5:07:27<1:13:09, 44.79s/it]

25957


 79%|███████▉  | 374/471 [5:08:19<1:15:42, 46.83s/it]

26012


 80%|███████▉  | 375/471 [5:09:12<1:17:22, 48.36s/it]

26178


 80%|███████▉  | 376/471 [5:10:04<1:18:17, 49.45s/it]

26194


 80%|████████  | 377/471 [5:10:56<1:18:46, 50.28s/it]

26163


 80%|████████  | 378/471 [5:11:49<1:18:56, 50.93s/it]

26240


 80%|████████  | 379/471 [5:12:41<1:18:41, 51.32s/it]

26317


 81%|████████  | 380/471 [5:13:33<1:18:20, 51.65s/it]

26095


 81%|████████  | 382/471 [5:14:26<59:16, 39.97s/it]  

26021


 81%|████████▏ | 383/471 [5:15:18<1:03:06, 43.03s/it]

26024


 82%|████████▏ | 384/471 [5:16:10<1:05:44, 45.34s/it]

25860


 82%|████████▏ | 385/471 [5:17:02<1:07:35, 47.16s/it]

26129


 82%|████████▏ | 386/471 [5:17:55<1:09:03, 48.74s/it]

26027


 83%|████████▎ | 389/471 [5:18:47<43:19, 31.70s/it]  

26537


 83%|████████▎ | 390/471 [5:19:39<48:25, 35.87s/it]

26589


 83%|████████▎ | 392/471 [5:20:31<42:20, 32.15s/it]

26545


 83%|████████▎ | 393/471 [5:21:23<47:07, 36.25s/it]

26394


 84%|████████▎ | 394/471 [5:22:15<51:08, 39.85s/it]

26526


 84%|████████▍ | 395/471 [5:23:06<54:14, 42.83s/it]

26437


 84%|████████▍ | 397/471 [5:23:58<44:11, 35.83s/it]

26411


 85%|████████▍ | 398/471 [5:24:51<48:11, 39.61s/it]

26560


 85%|████████▍ | 399/471 [5:25:42<51:06, 42.59s/it]

26601


 85%|████████▍ | 400/471 [5:26:34<53:13, 44.98s/it]

26593


 85%|████████▌ | 401/471 [5:27:26<54:38, 46.84s/it]

26532


 85%|████████▌ | 402/471 [5:28:18<55:25, 48.19s/it]

26491


 86%|████████▌ | 405/471 [5:29:10<34:30, 31.36s/it]

26514


 86%|████████▌ | 406/471 [5:30:01<38:31, 35.56s/it]

26749


 86%|████████▋ | 407/471 [5:30:53<41:44, 39.13s/it]

26662


 87%|████████▋ | 408/471 [5:31:44<44:08, 42.04s/it]

11427


 87%|████████▋ | 409/471 [5:32:35<45:54, 44.42s/it]

26685


 87%|████████▋ | 410/471 [5:33:26<46:58, 46.21s/it]

11387


 87%|████████▋ | 412/471 [5:34:17<36:37, 37.25s/it]

11290


 88%|████████▊ | 413/471 [5:35:08<39:14, 40.60s/it]

11314


 88%|████████▊ | 414/471 [5:36:00<41:08, 43.30s/it]

11206


 88%|████████▊ | 415/471 [5:36:51<42:23, 45.42s/it]

26738


 88%|████████▊ | 416/471 [5:37:43<43:15, 47.19s/it]

11378


 89%|████████▊ | 417/471 [5:38:34<43:28, 48.31s/it]

11219


 89%|████████▊ | 418/471 [5:39:25<43:26, 49.18s/it]

11216


 89%|████████▉ | 420/471 [5:40:17<32:50, 38.64s/it]

11224


 89%|████████▉ | 421/471 [5:41:08<34:47, 41.76s/it]

26649


 90%|████████▉ | 422/471 [5:42:00<36:04, 44.18s/it]

11325


 90%|████████▉ | 423/471 [5:42:51<36:50, 46.05s/it]

11283


 90%|█████████ | 424/471 [5:43:42<37:17, 47.61s/it]

11279


 91%|█████████ | 428/471 [5:44:33<18:48, 26.24s/it]

11388


 91%|█████████ | 429/471 [5:45:25<21:31, 30.75s/it]

11447


 91%|█████████▏| 430/471 [5:46:16<23:48, 34.85s/it]

11521


 92%|█████████▏| 431/471 [5:47:07<25:39, 38.50s/it]

11570


 92%|█████████▏| 432/471 [5:47:57<26:57, 41.49s/it]

11797


 92%|█████████▏| 433/471 [5:48:48<27:46, 43.85s/it]

11854


 92%|█████████▏| 434/471 [5:49:39<28:11, 45.72s/it]

11857


 92%|█████████▏| 435/471 [5:50:30<28:17, 47.15s/it]

11728


 93%|█████████▎| 436/471 [5:51:20<28:04, 48.14s/it]

11867


 93%|█████████▎| 437/471 [5:52:11<27:41, 48.86s/it]

11835


 93%|█████████▎| 438/471 [5:53:02<27:14, 49.52s/it]

11625


 93%|█████████▎| 440/471 [5:53:53<19:55, 38.56s/it]

11891


 94%|█████████▎| 441/471 [5:54:44<20:46, 41.55s/it]

11742


 94%|█████████▍| 442/471 [5:55:35<21:16, 44.01s/it]

11890


 94%|█████████▍| 443/471 [5:56:25<21:22, 45.82s/it]

11704


 94%|█████████▍| 444/471 [5:57:16<21:13, 47.18s/it]

11712


 94%|█████████▍| 445/471 [5:58:07<20:54, 48.26s/it]

11882


 95%|█████████▍| 446/471 [5:58:58<20:25, 49.03s/it]

11608


 95%|█████████▍| 447/471 [5:59:49<19:47, 49.49s/it]

11525


 95%|█████████▌| 448/471 [6:00:39<19:06, 49.84s/it]

11720


 95%|█████████▌| 449/471 [6:01:30<18:22, 50.10s/it]

11451


 96%|█████████▌| 451/471 [6:02:21<12:57, 38.87s/it]

12110


 96%|█████████▌| 452/471 [6:03:11<13:11, 41.67s/it]

12106


 96%|█████████▌| 453/471 [6:04:02<13:11, 43.97s/it]

12239


 96%|█████████▋| 454/471 [6:04:52<12:56, 45.69s/it]

11910


 97%|█████████▋| 455/471 [6:05:43<12:32, 47.01s/it]

12056


 97%|█████████▋| 456/471 [6:06:33<12:00, 48.04s/it]

12191


 97%|█████████▋| 457/471 [6:07:23<11:21, 48.67s/it]

12082


 97%|█████████▋| 458/471 [6:08:13<10:37, 49.06s/it]

12075


 97%|█████████▋| 459/471 [6:09:04<09:52, 49.37s/it]

12256


 98%|█████████▊| 460/471 [6:09:53<09:04, 49.52s/it]

12226


 98%|█████████▊| 461/471 [6:10:43<08:16, 49.60s/it]

12228


 98%|█████████▊| 462/471 [6:11:34<07:28, 49.83s/it]

12078


 98%|█████████▊| 463/471 [6:12:24<06:39, 49.95s/it]

11985


 99%|█████████▊| 464/471 [6:13:14<05:49, 50.00s/it]

12192


 99%|█████████▊| 465/471 [6:14:05<05:01, 50.20s/it]

12087


 99%|█████████▉| 466/471 [6:14:54<04:10, 50.11s/it]

12042


 99%|█████████▉| 467/471 [6:15:45<03:20, 50.16s/it]

12029


100%|█████████▉| 469/471 [6:16:35<01:17, 38.54s/it]

12074


100%|██████████| 471/471 [6:17:25<00:00, 48.08s/it]


In [92]:
pickle.dump(result_dict_story_gen_just_tfidf_novel_method, open('25_rresult_dict_story_gen_just_tfidf_novel_method_v128.p', 'wb'))

In [95]:
# embedding based model
seen_doc = set()
result_dict_story_gen_just_emb_novel_method = dict()
for se in tqdm(seed_documents_for_experiments):
    if se in seen_doc:
        continue
    print(se)
    res = story_gen_just_emb_novel_method(se, collect_ids, min_acceptable_similarity= 0.94) 
    result_dict_story_gen_just_emb_novel_method[se] = res
    for d in res:
        seen_doc.add(d)

  0%|          | 0/471 [00:00<?, ?it/s]

22989


  0%|          | 1/471 [00:06<47:18,  6.04s/it]

23079


  0%|          | 2/471 [00:12<47:09,  6.03s/it]

23002


  1%|          | 3/471 [00:18<46:58,  6.02s/it]

23124


  1%|          | 4/471 [00:24<46:53,  6.02s/it]

23131


  1%|          | 5/471 [00:30<46:48,  6.03s/it]

23119


  1%|▏         | 6/471 [00:36<46:44,  6.03s/it]

23192


  1%|▏         | 7/471 [00:42<46:42,  6.04s/it]

23066


  2%|▏         | 8/471 [00:48<46:38,  6.04s/it]

22988


  2%|▏         | 9/471 [00:54<46:30,  6.04s/it]

23159


  2%|▏         | 10/471 [01:00<46:20,  6.03s/it]

23107


  2%|▏         | 11/471 [01:06<46:08,  6.02s/it]

23022


  3%|▎         | 12/471 [01:12<45:58,  6.01s/it]

23045


  3%|▎         | 13/471 [01:18<45:50,  6.01s/it]

23110


  3%|▎         | 14/471 [01:24<45:53,  6.03s/it]

23084


  3%|▎         | 15/471 [01:30<45:53,  6.04s/it]

23017


  3%|▎         | 16/471 [01:36<45:50,  6.04s/it]

23036


  4%|▎         | 17/471 [01:42<45:45,  6.05s/it]

23023


  4%|▍         | 18/471 [01:48<45:39,  6.05s/it]

23005


  4%|▍         | 19/471 [01:54<45:36,  6.05s/it]

23123


  4%|▍         | 20/471 [02:00<45:32,  6.06s/it]

23061


  4%|▍         | 21/471 [02:06<45:26,  6.06s/it]

23121


  5%|▍         | 22/471 [02:12<45:17,  6.05s/it]

23073


  5%|▍         | 23/471 [02:18<45:04,  6.04s/it]

23020


  5%|▌         | 24/471 [02:24<44:55,  6.03s/it]

23163


  5%|▌         | 25/471 [02:30<44:46,  6.02s/it]

23177


  6%|▌         | 26/471 [02:36<44:40,  6.02s/it]

23077


  6%|▌         | 27/471 [02:42<44:35,  6.03s/it]

23130


  6%|▌         | 28/471 [02:48<44:34,  6.04s/it]

23188


  6%|▌         | 29/471 [02:55<44:27,  6.04s/it]

23054


  6%|▋         | 30/471 [03:01<44:18,  6.03s/it]

22990


  7%|▋         | 31/471 [03:07<44:10,  6.02s/it]

23189


  7%|▋         | 32/471 [03:13<44:04,  6.02s/it]

23115


  7%|▋         | 33/471 [03:19<43:59,  6.03s/it]

23040


  7%|▋         | 34/471 [03:25<43:53,  6.03s/it]

23013


  7%|▋         | 35/471 [03:31<43:52,  6.04s/it]

23185


  8%|▊         | 36/471 [03:37<43:44,  6.03s/it]

23126


  8%|▊         | 37/471 [03:43<43:38,  6.03s/it]

22997


  8%|▊         | 38/471 [03:49<43:30,  6.03s/it]

23060


  8%|▊         | 39/471 [03:55<43:25,  6.03s/it]

23041


  8%|▊         | 40/471 [04:01<43:19,  6.03s/it]

22986


  9%|▊         | 41/471 [04:07<43:13,  6.03s/it]

23004


  9%|▉         | 42/471 [04:13<43:06,  6.03s/it]

23137


  9%|▉         | 43/471 [04:19<43:00,  6.03s/it]

23057


  9%|▉         | 44/471 [04:25<42:58,  6.04s/it]

23136


 10%|▉         | 45/471 [04:31<42:51,  6.04s/it]

23068


 10%|▉         | 46/471 [04:37<42:44,  6.03s/it]

23064


 10%|▉         | 47/471 [04:43<42:39,  6.04s/it]

23146


 10%|█         | 48/471 [04:49<42:37,  6.05s/it]

23166


 10%|█         | 49/471 [04:55<42:30,  6.04s/it]

23055


 11%|█         | 50/471 [05:01<42:24,  6.04s/it]

23026


 11%|█         | 51/471 [05:07<42:18,  6.04s/it]

23018


 11%|█         | 52/471 [05:13<42:12,  6.04s/it]

22996


 11%|█▏        | 53/471 [05:19<42:05,  6.04s/it]

22998


 11%|█▏        | 54/471 [05:25<41:59,  6.04s/it]

23006


 12%|█▏        | 55/471 [05:31<41:54,  6.04s/it]

23167


 12%|█▏        | 56/471 [05:37<41:46,  6.04s/it]

22983


 12%|█▏        | 57/471 [05:44<41:38,  6.04s/it]

23080


 12%|█▏        | 58/471 [05:50<41:28,  6.03s/it]

23031


 13%|█▎        | 59/471 [05:56<41:20,  6.02s/it]

23035


 13%|█▎        | 60/471 [06:02<41:13,  6.02s/it]

22994


 13%|█▎        | 61/471 [06:08<41:09,  6.02s/it]

23147


 13%|█▎        | 62/471 [06:14<41:01,  6.02s/it]

23122


 13%|█▎        | 63/471 [06:20<40:53,  6.01s/it]

23104


 14%|█▎        | 64/471 [06:26<40:50,  6.02s/it]

23565


 14%|█▍        | 66/471 [06:32<31:09,  4.62s/it]

23489


 14%|█▍        | 67/471 [06:38<33:19,  4.95s/it]

23312


 14%|█▍        | 68/471 [06:44<35:03,  5.22s/it]

23344


 15%|█▍        | 70/471 [06:49<28:29,  4.26s/it]

23461


 15%|█▌        | 71/471 [06:55<31:04,  4.66s/it]

23331


 15%|█▌        | 72/471 [07:01<33:09,  4.99s/it]

23197


 15%|█▌        | 73/471 [07:07<34:45,  5.24s/it]

23298


 16%|█▌        | 74/471 [07:13<35:59,  5.44s/it]

23551


 16%|█▌        | 75/471 [07:19<36:52,  5.59s/it]

23510


 16%|█▌        | 76/471 [07:25<37:30,  5.70s/it]

23481


 16%|█▋        | 77/471 [07:31<37:55,  5.78s/it]

23474


 17%|█▋        | 80/471 [07:37<23:56,  3.67s/it]

23553


 17%|█▋        | 81/471 [07:43<26:59,  4.15s/it]

23431


 17%|█▋        | 82/471 [07:49<29:37,  4.57s/it]

23359


 18%|█▊        | 83/471 [07:55<31:47,  4.92s/it]

23206


 18%|█▊        | 84/471 [08:01<33:27,  5.19s/it]

23201


 18%|█▊        | 85/471 [08:07<34:44,  5.40s/it]

23395


 18%|█▊        | 86/471 [08:13<35:38,  5.56s/it]

23291


 18%|█▊        | 87/471 [08:19<36:17,  5.67s/it]

23569


 19%|█▊        | 88/471 [08:25<36:44,  5.76s/it]

23624


 19%|█▉        | 89/471 [08:31<36:59,  5.81s/it]

23511


 19%|█▉        | 90/471 [08:37<37:08,  5.85s/it]

23296


 19%|█▉        | 91/471 [08:43<37:15,  5.88s/it]

23397


 20%|█▉        | 92/471 [08:49<37:20,  5.91s/it]

23248


 20%|█▉        | 93/471 [08:55<37:21,  5.93s/it]

23570


 20%|█▉        | 94/471 [09:01<37:18,  5.94s/it]

23442


 20%|██        | 95/471 [09:07<37:13,  5.94s/it]

23460


 20%|██        | 96/471 [09:13<37:09,  5.95s/it]

23193


 21%|██        | 97/471 [09:18<37:02,  5.94s/it]

23608


 21%|██        | 98/471 [09:24<37:00,  5.95s/it]

23199


 21%|██        | 99/471 [09:30<36:53,  5.95s/it]

23550


 21%|██        | 100/471 [09:36<36:48,  5.95s/it]

23417


 21%|██▏       | 101/471 [09:42<36:41,  5.95s/it]

23266


 22%|██▏       | 102/471 [09:48<36:34,  5.95s/it]

23480


 22%|██▏       | 103/471 [09:54<36:27,  5.95s/it]

23576


 22%|██▏       | 104/471 [10:00<36:22,  5.95s/it]

23613


 22%|██▏       | 105/471 [10:06<36:17,  5.95s/it]

23581


 23%|██▎       | 106/471 [10:12<36:12,  5.95s/it]

23313


 23%|██▎       | 107/471 [10:18<36:05,  5.95s/it]

23618


 23%|██▎       | 108/471 [10:24<35:58,  5.95s/it]

23428


 23%|██▎       | 109/471 [10:30<35:51,  5.94s/it]

23383


 23%|██▎       | 110/471 [10:36<35:44,  5.94s/it]

23458


 24%|██▎       | 111/471 [10:42<35:38,  5.94s/it]

23401


 24%|██▍       | 112/471 [10:48<35:33,  5.94s/it]

23216


 24%|██▍       | 113/471 [10:54<35:27,  5.94s/it]

23468


 24%|██▍       | 114/471 [11:00<35:21,  5.94s/it]

23260


 24%|██▍       | 115/471 [11:06<35:14,  5.94s/it]

23443


 25%|██▍       | 116/471 [11:11<35:08,  5.94s/it]

23541


 25%|██▍       | 117/471 [11:17<35:02,  5.94s/it]

23282


 25%|██▌       | 118/471 [11:23<34:57,  5.94s/it]

23487


 25%|██▌       | 119/471 [11:29<34:50,  5.94s/it]

23247


 25%|██▌       | 120/471 [11:35<34:44,  5.94s/it]

23252


 26%|██▌       | 121/471 [11:41<34:39,  5.94s/it]

23445


 26%|██▌       | 122/471 [11:47<34:33,  5.94s/it]

23337


 26%|██▌       | 123/471 [11:53<34:28,  5.94s/it]

23210


 26%|██▋       | 124/471 [11:59<34:21,  5.94s/it]

23603


 27%|██▋       | 125/471 [12:05<34:17,  5.95s/it]

23749


 27%|██▋       | 126/471 [12:11<34:05,  5.93s/it]

23723


 27%|██▋       | 127/471 [12:17<33:54,  5.91s/it]

23873


 27%|██▋       | 128/471 [12:23<33:44,  5.90s/it]

23776


 27%|██▋       | 129/471 [12:28<33:37,  5.90s/it]

23874


 28%|██▊       | 130/471 [12:34<33:29,  5.89s/it]

23899


 28%|██▊       | 132/471 [12:40<25:36,  4.53s/it]

23887


 28%|██▊       | 133/471 [12:46<27:28,  4.88s/it]

23837


 28%|██▊       | 134/471 [12:52<28:51,  5.14s/it]

23854


 29%|██▊       | 135/471 [12:58<29:55,  5.34s/it]

23756


 29%|██▉       | 136/471 [13:04<30:42,  5.50s/it]

23810


 29%|██▉       | 137/471 [13:10<31:15,  5.61s/it]

23688


 29%|██▉       | 138/471 [13:16<31:37,  5.70s/it]

23902


 30%|██▉       | 139/471 [13:22<31:52,  5.76s/it]

23845


 30%|██▉       | 140/471 [13:27<31:59,  5.80s/it]

23863


 30%|██▉       | 141/471 [13:33<32:04,  5.83s/it]

23721


 30%|███       | 142/471 [13:39<32:05,  5.85s/it]

23859


 30%|███       | 143/471 [13:45<32:03,  5.87s/it]

23841


 31%|███       | 144/471 [13:51<32:00,  5.87s/it]

23625


 31%|███       | 145/471 [13:57<31:57,  5.88s/it]

23833


 31%|███       | 146/471 [14:03<31:54,  5.89s/it]

23862


 31%|███       | 147/471 [14:09<31:47,  5.89s/it]

23634


 31%|███▏      | 148/471 [14:15<31:41,  5.89s/it]

23827


 32%|███▏      | 149/471 [14:21<31:35,  5.89s/it]

23631


 32%|███▏      | 150/471 [14:26<31:30,  5.89s/it]

23916


 32%|███▏      | 151/471 [14:32<31:23,  5.89s/it]

23895


 32%|███▏      | 152/471 [14:38<31:17,  5.88s/it]

23733


 32%|███▏      | 153/471 [14:44<31:12,  5.89s/it]

23628


 33%|███▎      | 154/471 [14:50<31:04,  5.88s/it]

23788


 33%|███▎      | 155/471 [14:56<30:58,  5.88s/it]

23901


 33%|███▎      | 156/471 [15:02<30:53,  5.88s/it]

23638


 33%|███▎      | 157/471 [15:08<30:46,  5.88s/it]

23826


 34%|███▎      | 158/471 [15:13<30:41,  5.88s/it]

23643


 34%|███▍      | 159/471 [15:19<30:36,  5.88s/it]

23705


 34%|███▍      | 160/471 [15:25<30:30,  5.89s/it]

23875


 34%|███▍      | 161/471 [15:31<30:25,  5.89s/it]

23831


 34%|███▍      | 162/471 [15:37<30:19,  5.89s/it]

23809


 35%|███▍      | 163/471 [15:43<30:14,  5.89s/it]

23871


 35%|███▍      | 164/471 [15:49<30:07,  5.89s/it]

23828


 35%|███▌      | 165/471 [15:55<30:00,  5.88s/it]

23794


 35%|███▌      | 166/471 [16:01<29:56,  5.89s/it]

23777


 35%|███▌      | 167/471 [16:06<29:50,  5.89s/it]

23735


 36%|███▌      | 168/471 [16:12<29:42,  5.88s/it]

23896


 36%|███▌      | 169/471 [16:18<29:36,  5.88s/it]

23714


 36%|███▌      | 170/471 [16:24<29:31,  5.88s/it]

23829


 36%|███▋      | 171/471 [16:30<29:26,  5.89s/it]

24089


 37%|███▋      | 172/471 [16:36<29:15,  5.87s/it]

24075


 37%|███▋      | 174/471 [16:42<22:19,  4.51s/it]

23941


 37%|███▋      | 175/471 [16:48<23:51,  4.84s/it]

24061


 38%|███▊      | 177/471 [16:53<19:46,  4.04s/it]

24193


 38%|███▊      | 178/471 [16:59<21:42,  4.44s/it]

24325


 38%|███▊      | 180/471 [17:05<18:35,  3.83s/it]

24303


 38%|███▊      | 181/471 [17:11<20:38,  4.27s/it]

24239


 39%|███▊      | 182/471 [17:17<22:20,  4.64s/it]

24148


 39%|███▉      | 183/471 [17:22<23:43,  4.94s/it]

24240


 39%|███▉      | 184/471 [17:28<24:45,  5.18s/it]

24013


 39%|███▉      | 185/471 [17:34<25:31,  5.35s/it]

24042


 39%|███▉      | 186/471 [17:40<26:06,  5.50s/it]

24329


 40%|███▉      | 187/471 [17:46<26:27,  5.59s/it]

24194


 40%|███▉      | 188/471 [17:52<26:41,  5.66s/it]

24268


 40%|████      | 189/471 [17:57<26:50,  5.71s/it]

24005


 40%|████      | 190/471 [18:03<27:01,  5.77s/it]

24132


 41%|████      | 191/471 [18:09<27:03,  5.80s/it]

24316


 41%|████      | 192/471 [18:15<27:01,  5.81s/it]

24153


 41%|████      | 193/471 [18:21<26:58,  5.82s/it]

24146


 41%|████      | 194/471 [18:27<26:55,  5.83s/it]

24338


 41%|████▏     | 195/471 [18:33<26:50,  5.83s/it]

24269


 42%|████▏     | 196/471 [18:38<26:45,  5.84s/it]

23990


 42%|████▏     | 197/471 [18:44<26:40,  5.84s/it]

23952


 42%|████▏     | 198/471 [18:50<26:36,  5.85s/it]

24180


 42%|████▏     | 199/471 [18:56<26:29,  5.84s/it]

23954


 43%|████▎     | 201/471 [19:02<20:13,  4.49s/it]

24067


 43%|████▎     | 202/471 [19:08<21:37,  4.82s/it]

24190


 43%|████▎     | 203/471 [19:13<22:43,  5.09s/it]

24137


 43%|████▎     | 204/471 [19:19<23:32,  5.29s/it]

24271


 44%|████▎     | 205/471 [19:25<24:08,  5.44s/it]

24313


 44%|████▎     | 206/471 [19:31<24:32,  5.56s/it]

24158


 44%|████▍     | 207/471 [19:37<24:46,  5.63s/it]

23951


 44%|████▍     | 208/471 [19:43<24:54,  5.68s/it]

23975


 45%|████▍     | 210/471 [19:48<19:13,  4.42s/it]

24169


 45%|████▍     | 211/471 [19:54<20:38,  4.76s/it]

24150


 45%|████▌     | 212/471 [20:00<21:45,  5.04s/it]

23969


 45%|████▌     | 213/471 [20:06<22:34,  5.25s/it]

24000


 45%|████▌     | 214/471 [20:12<23:10,  5.41s/it]

24633


 46%|████▌     | 215/471 [20:17<23:29,  5.51s/it]

24412


 46%|████▌     | 216/471 [20:23<23:43,  5.58s/it]

24610


 46%|████▌     | 217/471 [20:29<23:50,  5.63s/it]

24608


 46%|████▋     | 219/471 [20:35<18:23,  4.38s/it]

24734


 47%|████▋     | 220/471 [20:40<19:44,  4.72s/it]

24729


 47%|████▋     | 221/471 [20:46<20:47,  4.99s/it]

24376


 47%|████▋     | 222/471 [20:52<21:36,  5.21s/it]

24446


 47%|████▋     | 223/471 [20:58<22:10,  5.37s/it]

24747


 48%|████▊     | 224/471 [21:04<22:33,  5.48s/it]

24352


 48%|████▊     | 225/471 [21:09<22:49,  5.57s/it]

24687


 48%|████▊     | 226/471 [21:15<22:58,  5.63s/it]

24396


 48%|████▊     | 227/471 [21:21<23:04,  5.67s/it]

24374


 48%|████▊     | 228/471 [21:27<23:05,  5.70s/it]

24361


 49%|████▊     | 229/471 [21:32<23:04,  5.72s/it]

24450


 49%|████▉     | 230/471 [21:38<23:03,  5.74s/it]

24419


 49%|████▉     | 231/471 [21:44<22:59,  5.75s/it]

24696


 49%|████▉     | 233/471 [21:50<17:34,  4.43s/it]

24415


 50%|████▉     | 234/471 [21:56<18:49,  4.77s/it]

24509


 50%|████▉     | 235/471 [22:01<19:47,  5.03s/it]

24728


 50%|█████     | 236/471 [22:07<20:30,  5.24s/it]

24702


 50%|█████     | 237/471 [22:13<21:01,  5.39s/it]

24344


 51%|█████     | 239/471 [22:19<16:33,  4.28s/it]

24479


 51%|█████     | 240/471 [22:24<17:52,  4.64s/it]

24612


 51%|█████▏    | 242/471 [22:30<14:59,  3.93s/it]

24497


 52%|█████▏    | 243/471 [22:36<16:30,  4.34s/it]

24670


 52%|█████▏    | 244/471 [22:42<17:44,  4.69s/it]

24622


 52%|█████▏    | 245/471 [22:48<18:41,  4.96s/it]

24750


 52%|█████▏    | 246/471 [22:53<19:26,  5.18s/it]

24488


 52%|█████▏    | 247/471 [22:59<19:57,  5.34s/it]

24755


 53%|█████▎    | 248/471 [23:05<20:17,  5.46s/it]

24689


 53%|█████▎    | 249/471 [23:11<20:31,  5.55s/it]

24621


 53%|█████▎    | 250/471 [23:16<20:40,  5.61s/it]

24738


 53%|█████▎    | 251/471 [23:22<20:44,  5.66s/it]

24508


 54%|█████▎    | 252/471 [23:28<20:45,  5.69s/it]

24552


 54%|█████▎    | 253/471 [23:34<20:44,  5.71s/it]

24705


 54%|█████▍    | 254/471 [23:39<20:42,  5.72s/it]

24722


 54%|█████▍    | 255/471 [23:45<20:37,  5.73s/it]

24462


 54%|█████▍    | 256/471 [23:51<20:33,  5.74s/it]

25024


 55%|█████▍    | 257/471 [23:57<20:28,  5.74s/it]

24770


 55%|█████▍    | 259/471 [24:02<15:34,  4.41s/it]

24884


 55%|█████▌    | 260/471 [24:08<16:37,  4.73s/it]

24811


 55%|█████▌    | 261/471 [24:14<17:26,  4.98s/it]

24763


 56%|█████▌    | 262/471 [24:19<18:01,  5.18s/it]

24870


 56%|█████▌    | 263/471 [24:25<18:27,  5.33s/it]

24918


 56%|█████▌    | 264/471 [24:31<18:43,  5.43s/it]

24979


 56%|█████▋    | 265/471 [24:37<18:54,  5.51s/it]

24848


 56%|█████▋    | 266/471 [24:42<19:00,  5.56s/it]

24880


 57%|█████▋    | 267/471 [24:48<19:02,  5.60s/it]

24841


 57%|█████▋    | 269/471 [24:54<14:37,  4.34s/it]

24790


 57%|█████▋    | 270/471 [24:59<15:39,  4.68s/it]

24864


 58%|█████▊    | 271/471 [25:05<16:27,  4.94s/it]

24804


 58%|█████▊    | 272/471 [25:11<17:03,  5.14s/it]

24789


 58%|█████▊    | 273/471 [25:16<17:30,  5.31s/it]

24885


 58%|█████▊    | 274/471 [25:22<17:47,  5.42s/it]

24891


 58%|█████▊    | 275/471 [25:28<17:57,  5.50s/it]

24917


 59%|█████▊    | 276/471 [25:34<18:03,  5.56s/it]

24915


 59%|█████▉    | 277/471 [25:39<18:07,  5.61s/it]

24932


 59%|█████▉    | 278/471 [25:45<18:08,  5.64s/it]

24905


 59%|█████▉    | 280/471 [25:51<13:53,  4.36s/it]

25022


 60%|█████▉    | 281/471 [25:56<14:52,  4.70s/it]

24824


 60%|█████▉    | 282/471 [26:02<15:36,  4.96s/it]

24867


 60%|██████    | 283/471 [26:08<16:09,  5.16s/it]

24893


 60%|██████    | 284/471 [26:13<16:33,  5.31s/it]

25315


 61%|██████    | 285/471 [26:19<16:44,  5.40s/it]

25552


 61%|██████    | 286/471 [26:25<16:52,  5.47s/it]

25197


 61%|██████    | 288/471 [26:30<13:02,  4.28s/it]

25526


 61%|██████▏   | 289/471 [26:36<13:58,  4.61s/it]

25180


 62%|██████▏   | 290/471 [26:42<14:42,  4.88s/it]

25079


 62%|██████▏   | 291/471 [26:47<15:14,  5.08s/it]

25219


 62%|██████▏   | 292/471 [26:53<15:37,  5.24s/it]

25231


 63%|██████▎   | 295/471 [26:59<10:00,  3.41s/it]

25124


 63%|██████▎   | 296/471 [27:04<11:16,  3.87s/it]

25435


 63%|██████▎   | 297/471 [27:10<12:22,  4.27s/it]

25572


 63%|██████▎   | 298/471 [27:16<13:16,  4.60s/it]

25300


 63%|██████▎   | 299/471 [27:21<13:57,  4.87s/it]

25122


 64%|██████▎   | 300/471 [27:27<14:28,  5.08s/it]

25322


 64%|██████▍   | 302/471 [27:32<11:31,  4.09s/it]

25250


 64%|██████▍   | 303/471 [27:38<12:28,  4.46s/it]

25285


 65%|██████▍   | 305/471 [27:44<10:29,  3.79s/it]

25408


 65%|██████▌   | 307/471 [27:49<09:23,  3.44s/it]

25372


 65%|██████▌   | 308/471 [27:55<10:34,  3.89s/it]

25141


 66%|██████▌   | 310/471 [28:01<09:22,  3.49s/it]

25520


 66%|██████▌   | 312/471 [28:06<08:38,  3.26s/it]

25144


 66%|██████▋   | 313/471 [28:12<09:49,  3.73s/it]

25414


 67%|██████▋   | 314/471 [28:17<10:51,  4.15s/it]

25573


 67%|██████▋   | 315/471 [28:23<11:42,  4.50s/it]

25560


 67%|██████▋   | 316/471 [28:29<12:25,  4.81s/it]

25508


 67%|██████▋   | 317/471 [28:34<12:54,  5.03s/it]

25279


 68%|██████▊   | 318/471 [28:40<13:14,  5.20s/it]

25200


 68%|██████▊   | 319/471 [28:46<13:28,  5.32s/it]

25121


 68%|██████▊   | 320/471 [28:51<13:36,  5.41s/it]

25325


 68%|██████▊   | 321/471 [28:57<13:41,  5.47s/it]

25333


 68%|██████▊   | 322/471 [29:03<13:42,  5.52s/it]

25162


 69%|██████▊   | 323/471 [29:08<13:42,  5.56s/it]

25264


 69%|██████▉   | 324/471 [29:14<13:40,  5.58s/it]

25331


 69%|██████▉   | 325/471 [29:20<13:36,  5.59s/it]

25349


 69%|██████▉   | 326/471 [29:25<13:33,  5.61s/it]

25130


 70%|██████▉   | 328/471 [29:31<10:18,  4.32s/it]

25701


 70%|██████▉   | 329/471 [29:36<10:57,  4.63s/it]

25634


 70%|███████   | 331/471 [29:42<09:01,  3.87s/it]

25619


 70%|███████   | 332/471 [29:47<09:51,  4.26s/it]

25710


 71%|███████   | 333/471 [29:53<10:32,  4.58s/it]

25632


 71%|███████   | 334/471 [29:59<11:03,  4.84s/it]

25631


 71%|███████   | 335/471 [30:04<11:25,  5.04s/it]

25847


 71%|███████▏  | 336/471 [30:10<11:40,  5.19s/it]

25610


 72%|███████▏  | 337/471 [30:15<11:50,  5.30s/it]

25605


 72%|███████▏  | 338/471 [30:21<11:55,  5.38s/it]

25855


 72%|███████▏  | 339/471 [30:27<11:57,  5.44s/it]

25614


 72%|███████▏  | 340/471 [30:32<11:57,  5.48s/it]

25809


 72%|███████▏  | 341/471 [30:38<11:56,  5.51s/it]

25687


 73%|███████▎  | 342/471 [30:43<11:54,  5.54s/it]

25674


 73%|███████▎  | 344/471 [30:49<09:03,  4.28s/it]

25802


 73%|███████▎  | 345/471 [30:54<09:39,  4.60s/it]

25671


 73%|███████▎  | 346/471 [31:00<10:07,  4.86s/it]

25705


 74%|███████▍  | 348/471 [31:06<08:08,  3.98s/it]

25824


 74%|███████▍  | 349/471 [31:11<08:51,  4.35s/it]

25787


 74%|███████▍  | 350/471 [31:17<09:23,  4.66s/it]

25833


 75%|███████▍  | 351/471 [31:22<09:48,  4.90s/it]

25857


 75%|███████▍  | 352/471 [31:28<10:05,  5.09s/it]

25630


 75%|███████▍  | 353/471 [31:34<10:15,  5.22s/it]

25616


 75%|███████▌  | 354/471 [31:39<10:22,  5.32s/it]

25613


 76%|███████▌  | 356/471 [31:45<08:00,  4.18s/it]

25654


 76%|███████▌  | 357/471 [31:50<08:35,  4.52s/it]

25589


 76%|███████▌  | 358/471 [31:56<09:02,  4.80s/it]

26132


 76%|███████▌  | 359/471 [32:01<09:18,  4.99s/it]

25931


 76%|███████▋  | 360/471 [32:07<09:29,  5.13s/it]

26331


 77%|███████▋  | 362/471 [32:12<07:24,  4.08s/it]

26136


 77%|███████▋  | 363/471 [32:18<07:57,  4.42s/it]

26160


 77%|███████▋  | 364/471 [32:23<08:22,  4.70s/it]

26038


 77%|███████▋  | 365/471 [32:29<08:41,  4.92s/it]

26259


 78%|███████▊  | 366/471 [32:34<08:53,  5.08s/it]

26067


 78%|███████▊  | 367/471 [32:40<09:00,  5.19s/it]

25885


 78%|███████▊  | 368/471 [32:45<09:03,  5.28s/it]

26074


 78%|███████▊  | 369/471 [32:51<09:05,  5.35s/it]

26284


 79%|███████▊  | 370/471 [32:56<09:05,  5.40s/it]

26002


 79%|███████▉  | 372/471 [33:02<06:54,  4.19s/it]

25940


 79%|███████▉  | 373/471 [33:07<07:22,  4.52s/it]

25957


 79%|███████▉  | 374/471 [33:13<07:43,  4.78s/it]

26012


 80%|███████▉  | 375/471 [33:18<07:57,  4.97s/it]

26178


 80%|███████▉  | 376/471 [33:24<08:06,  5.12s/it]

26194


 80%|████████  | 377/471 [33:29<08:11,  5.23s/it]

26163


 80%|████████  | 378/471 [33:35<08:13,  5.31s/it]

26240


 80%|████████  | 379/471 [33:40<08:13,  5.36s/it]

26317


 81%|████████  | 380/471 [33:46<08:11,  5.40s/it]

26174


 81%|████████  | 381/471 [33:51<08:08,  5.43s/it]

26095


 81%|████████  | 382/471 [33:57<08:05,  5.45s/it]

26021


 81%|████████▏ | 383/471 [34:02<08:00,  5.46s/it]

26024


 82%|████████▏ | 384/471 [34:08<07:56,  5.47s/it]

25860


 82%|████████▏ | 385/471 [34:13<07:51,  5.48s/it]

26129


 82%|████████▏ | 386/471 [34:19<07:46,  5.49s/it]

25991


 82%|████████▏ | 387/471 [34:24<07:42,  5.51s/it]

26084


 82%|████████▏ | 388/471 [34:30<07:37,  5.51s/it]

26027


 83%|████████▎ | 389/471 [34:36<07:32,  5.51s/it]

26537


 83%|████████▎ | 390/471 [34:41<07:25,  5.49s/it]

26489


 83%|████████▎ | 391/471 [34:46<07:18,  5.49s/it]

26545


 83%|████████▎ | 393/471 [34:52<05:28,  4.21s/it]

26526


 84%|████████▍ | 395/471 [34:57<04:35,  3.62s/it]

26409


 84%|████████▍ | 396/471 [35:03<05:01,  4.02s/it]

26437


 84%|████████▍ | 397/471 [35:08<05:22,  4.36s/it]

26411


 85%|████████▍ | 398/471 [35:14<05:38,  4.64s/it]

26560


 85%|████████▍ | 399/471 [35:19<05:49,  4.85s/it]

26601


 85%|████████▍ | 400/471 [35:25<05:56,  5.02s/it]

26532


 85%|████████▌ | 402/471 [35:30<04:36,  4.00s/it]

26405


 86%|████████▌ | 403/471 [35:35<04:55,  4.35s/it]

26585


 86%|████████▌ | 404/471 [35:41<05:09,  4.62s/it]

26491


 86%|████████▌ | 405/471 [35:46<05:19,  4.84s/it]

26514


 86%|████████▌ | 406/471 [35:52<05:25,  5.01s/it]

26749


 86%|████████▋ | 407/471 [35:57<05:27,  5.11s/it]

11427


 87%|████████▋ | 409/471 [36:02<04:09,  4.03s/it]

11436


 87%|████████▋ | 411/471 [36:08<03:30,  3.50s/it]

11387


 87%|████████▋ | 412/471 [36:13<03:50,  3.91s/it]

11290


 88%|████████▊ | 413/471 [36:19<04:07,  4.26s/it]

11314


 88%|████████▊ | 414/471 [36:24<04:19,  4.55s/it]

11206


 88%|████████▊ | 415/471 [36:29<04:27,  4.77s/it]

26738


 88%|████████▊ | 416/471 [36:35<04:31,  4.94s/it]

11378


 89%|████████▊ | 417/471 [36:40<04:33,  5.06s/it]

11219


 89%|████████▊ | 418/471 [36:45<04:32,  5.15s/it]

11205


 89%|████████▉ | 419/471 [36:51<04:30,  5.21s/it]

11216


 89%|████████▉ | 420/471 [36:56<04:28,  5.26s/it]

26649


 90%|████████▉ | 422/471 [37:02<03:20,  4.09s/it]

11325


 90%|████████▉ | 423/471 [37:07<03:31,  4.41s/it]

26729


 90%|█████████ | 425/471 [37:12<02:50,  3.70s/it]

11277


 91%|█████████ | 427/471 [37:18<02:26,  3.32s/it]

11279


 91%|█████████ | 428/471 [37:23<02:41,  3.75s/it]

11521


 92%|█████████▏| 431/471 [37:28<01:52,  2.81s/it]

11570


 92%|█████████▏| 432/471 [37:34<02:07,  3.27s/it]

11797


 92%|█████████▏| 433/471 [37:39<02:20,  3.70s/it]

11728


 93%|█████████▎| 436/471 [37:44<01:37,  2.79s/it]

11867


 93%|█████████▎| 437/471 [37:50<01:50,  3.25s/it]

11835


 93%|█████████▎| 438/471 [37:55<02:01,  3.68s/it]

11837


 93%|█████████▎| 439/471 [38:00<02:09,  4.05s/it]

11625


 93%|█████████▎| 440/471 [38:06<02:15,  4.36s/it]

11891


 94%|█████████▎| 441/471 [38:11<02:18,  4.61s/it]

11742


 94%|█████████▍| 442/471 [38:16<02:19,  4.80s/it]

11890


 94%|█████████▍| 443/471 [38:22<02:18,  4.94s/it]

11704


 94%|█████████▍| 444/471 [38:27<02:16,  5.05s/it]

11712


 94%|█████████▍| 445/471 [38:32<02:13,  5.13s/it]

11882


 95%|█████████▍| 446/471 [38:37<02:09,  5.18s/it]

11608


 95%|█████████▍| 447/471 [38:43<02:05,  5.22s/it]

11525


 95%|█████████▌| 448/471 [38:48<02:00,  5.25s/it]

11851


 96%|█████████▌| 450/471 [38:53<01:25,  4.06s/it]

11451


 96%|█████████▌| 451/471 [38:59<01:27,  4.37s/it]

12110


 96%|█████████▌| 452/471 [39:04<01:27,  4.60s/it]

12106


 96%|█████████▌| 453/471 [39:09<01:25,  4.77s/it]

12239


 96%|█████████▋| 454/471 [39:14<01:23,  4.91s/it]

11910


 97%|█████████▋| 455/471 [39:20<01:20,  5.01s/it]

12056


 97%|█████████▋| 456/471 [39:25<01:16,  5.08s/it]

12191


 97%|█████████▋| 457/471 [39:30<01:11,  5.13s/it]

12082


 97%|█████████▋| 458/471 [39:36<01:07,  5.17s/it]

12075


 97%|█████████▋| 459/471 [39:41<01:02,  5.19s/it]

12256


 98%|█████████▊| 460/471 [39:46<00:57,  5.21s/it]

12226


 98%|█████████▊| 461/471 [39:51<00:52,  5.22s/it]

12228


 98%|█████████▊| 462/471 [39:57<00:47,  5.23s/it]

12078


 98%|█████████▊| 463/471 [40:02<00:41,  5.24s/it]

11985


 99%|█████████▊| 464/471 [40:07<00:36,  5.24s/it]

12192


 99%|█████████▊| 465/471 [40:12<00:31,  5.24s/it]

12087


 99%|█████████▉| 466/471 [40:18<00:26,  5.24s/it]

12042


 99%|█████████▉| 467/471 [40:23<00:20,  5.25s/it]

12253


 99%|█████████▉| 468/471 [40:28<00:15,  5.25s/it]

12281


100%|██████████| 471/471 [40:33<00:00,  5.17s/it]


In [96]:
pickle.dump(result_dict_story_gen_just_emb_novel_method, open('25_result_dict_story_gen_just_emb_novel_method_v128.p', 'wb'))

In [97]:
# the proposed model without distributed similarity mechanism
seen_doc = set()
result_dict_story_gen_emb_tfidf_novel_method = dict()
for se in tqdm(seed_documents_for_experiments):
    if se in seen_doc:
        continue
    print(se)
    res = story_gen_emb_tfidf_novel_method(se, collect_ids)
    result_dict_story_gen_emb_tfidf_novel_method[se] = res
    for d in res:
        seen_doc.add(d)

  0%|          | 0/471 [00:00<?, ?it/s]

22989


  0%|          | 1/471 [02:30<19:36:42, 150.22s/it]

23079


  0%|          | 2/471 [04:55<19:10:19, 147.16s/it]

23002


  1%|          | 3/471 [07:46<20:33:08, 158.09s/it]

23124


  1%|          | 4/471 [09:43<18:23:35, 141.79s/it]

23131


  1%|          | 5/471 [12:16<18:53:55, 146.00s/it]

23119


  1%|▏         | 6/471 [15:02<19:42:43, 152.61s/it]

23192


  1%|▏         | 7/471 [18:21<21:38:54, 167.96s/it]

23066


  2%|▏         | 8/471 [21:11<21:40:18, 168.51s/it]

22988


  2%|▏         | 9/471 [24:10<22:02:41, 171.78s/it]

23159


  2%|▏         | 10/471 [27:55<24:06:40, 188.29s/it]

23107


  2%|▏         | 11/471 [31:40<25:30:01, 199.57s/it]

23022


  3%|▎         | 12/471 [34:33<24:23:53, 191.36s/it]

23045


  3%|▎         | 13/471 [36:50<22:14:33, 174.83s/it]

23110


  3%|▎         | 14/471 [39:52<22:30:00, 177.24s/it]

23084


  3%|▎         | 15/471 [41:56<20:23:51, 161.03s/it]

23017


  3%|▎         | 16/471 [44:27<19:59:21, 158.16s/it]

23036


  4%|▎         | 17/471 [47:37<21:09:24, 167.76s/it]

23023


  4%|▍         | 18/471 [50:02<20:14:01, 160.80s/it]

23005


  4%|▍         | 19/471 [52:49<20:25:07, 162.63s/it]

23123


  4%|▍         | 20/471 [56:29<22:32:32, 179.94s/it]

23061


  4%|▍         | 21/471 [59:19<22:06:28, 176.86s/it]

23121


  5%|▍         | 22/471 [1:02:50<23:21:05, 187.23s/it]

23073


  5%|▍         | 23/471 [1:04:54<20:56:45, 168.32s/it]

23020


  5%|▌         | 24/471 [1:08:19<22:15:51, 179.31s/it]

23163


  5%|▌         | 25/471 [1:10:40<20:47:03, 167.77s/it]

23177


  6%|▌         | 26/471 [1:13:54<21:42:40, 175.64s/it]

23077


  6%|▌         | 27/471 [1:17:22<22:51:31, 185.34s/it]

23130


  6%|▌         | 28/471 [1:20:45<23:26:06, 190.44s/it]

23188


  6%|▌         | 29/471 [1:24:08<23:51:21, 194.30s/it]

23054


  6%|▋         | 30/471 [1:27:23<23:49:54, 194.55s/it]

22990


  7%|▋         | 31/471 [1:29:29<21:16:24, 174.05s/it]

23189


  7%|▋         | 32/471 [1:32:49<22:10:22, 181.83s/it]

23115


  7%|▋         | 33/471 [1:35:31<21:22:52, 175.74s/it]

23040


  7%|▋         | 34/471 [1:38:16<20:57:01, 172.59s/it]

23013


  7%|▋         | 35/471 [1:40:19<19:06:12, 157.74s/it]

23185


  8%|▊         | 36/471 [1:42:29<18:03:29, 149.45s/it]

23126


  8%|▊         | 37/471 [1:45:43<19:36:16, 162.62s/it]

22997


  8%|▊         | 38/471 [1:48:39<20:03:56, 166.83s/it]

23060


  8%|▊         | 39/471 [1:51:27<20:02:33, 167.02s/it]

23041


  8%|▊         | 40/471 [1:53:49<19:07:28, 159.74s/it]

22986


  9%|▊         | 41/471 [1:57:08<20:29:10, 171.51s/it]

23004


  9%|▉         | 42/471 [2:00:00<20:25:45, 171.43s/it]

23137


  9%|▉         | 43/471 [2:02:41<20:00:31, 168.30s/it]

23057


  9%|▉         | 44/471 [2:05:20<19:38:29, 165.60s/it]

23136


 10%|▉         | 45/471 [2:08:39<20:48:06, 175.79s/it]

23068


 10%|▉         | 46/471 [2:10:43<18:53:26, 160.02s/it]

23064


 10%|▉         | 47/471 [2:14:29<21:10:20, 179.77s/it]

23146


 10%|█         | 48/471 [2:17:35<21:21:24, 181.76s/it]

23166


 10%|█         | 49/471 [2:20:09<20:20:06, 173.48s/it]

23055


 11%|█         | 50/471 [2:23:20<20:54:26, 178.78s/it]

23026


 11%|█         | 51/471 [2:26:33<21:20:07, 182.87s/it]

23018


 11%|█         | 52/471 [2:30:07<22:23:41, 192.41s/it]

22996


 11%|█▏        | 53/471 [2:32:06<19:45:27, 170.16s/it]

22998


 11%|█▏        | 54/471 [2:35:02<19:56:35, 172.17s/it]

23006


 12%|█▏        | 55/471 [2:37:34<19:10:54, 166.00s/it]

23167


 12%|█▏        | 56/471 [2:40:58<20:27:00, 177.40s/it]

22983


 12%|█▏        | 57/471 [2:44:29<21:32:41, 187.35s/it]

23080


 12%|█▏        | 58/471 [2:48:13<22:46:46, 198.56s/it]

23031


 13%|█▎        | 59/471 [2:51:01<21:40:08, 189.34s/it]

23035


 13%|█▎        | 60/471 [2:54:04<21:24:17, 187.49s/it]

22994


 13%|█▎        | 61/471 [2:56:20<19:35:09, 171.97s/it]

23147


 13%|█▎        | 62/471 [2:59:29<20:06:02, 176.93s/it]

23122


 13%|█▎        | 63/471 [3:01:30<18:10:06, 160.31s/it]

23104


 14%|█▎        | 64/471 [3:04:51<19:30:09, 172.51s/it]

23525


 14%|█▍        | 65/471 [3:07:04<18:06:02, 160.50s/it]

23565


 14%|█▍        | 66/471 [3:09:18<17:09:58, 152.59s/it]

23489


 14%|█▍        | 67/471 [3:13:14<19:56:42, 177.73s/it]

23312


 14%|█▍        | 68/471 [3:15:32<18:33:36, 165.80s/it]

23400


 15%|█▍        | 69/471 [3:18:56<19:48:15, 177.35s/it]

23344


 15%|█▍        | 70/471 [3:22:10<20:17:38, 182.19s/it]

23461


 15%|█▌        | 71/471 [3:25:19<20:27:36, 184.14s/it]

23331


 15%|█▌        | 72/471 [3:28:23<20:25:18, 184.26s/it]

23197


 15%|█▌        | 73/471 [3:31:53<21:13:06, 191.93s/it]

23298


 16%|█▌        | 74/471 [3:34:51<20:42:01, 187.71s/it]

23551


 16%|█▌        | 75/471 [3:36:49<18:20:52, 166.80s/it]

23510


 16%|█▌        | 76/471 [3:39:41<18:28:09, 168.33s/it]

23481


 16%|█▋        | 77/471 [3:42:44<18:55:51, 172.97s/it]

23548


 17%|█▋        | 78/471 [3:46:31<20:38:34, 189.10s/it]

23466


 17%|█▋        | 79/471 [3:50:03<21:20:39, 196.02s/it]

23474


 17%|█▋        | 80/471 [3:53:05<20:49:56, 191.81s/it]

23553


 17%|█▋        | 81/471 [3:55:31<19:17:15, 178.04s/it]

23431


 17%|█▋        | 82/471 [3:59:10<20:32:45, 190.14s/it]

23359


 18%|█▊        | 83/471 [4:02:25<20:39:52, 191.73s/it]

23206


 18%|█▊        | 84/471 [4:05:47<20:56:00, 194.73s/it]

23201


 18%|█▊        | 85/471 [4:09:23<21:34:04, 201.15s/it]

23395


 18%|█▊        | 86/471 [4:11:52<19:50:19, 185.50s/it]

23291


 18%|█▊        | 87/471 [4:14:27<18:48:00, 176.25s/it]

23569


 19%|█▊        | 88/471 [4:17:54<19:44:09, 185.51s/it]

23624


 19%|█▉        | 89/471 [4:21:29<20:38:41, 194.56s/it]

23511


 19%|█▉        | 90/471 [4:23:26<18:07:08, 171.20s/it]

23296


 19%|█▉        | 91/471 [4:26:23<18:15:37, 172.99s/it]

23397


 20%|█▉        | 92/471 [4:29:28<18:34:14, 176.40s/it]

23248


 20%|█▉        | 93/471 [4:32:17<18:18:37, 174.39s/it]

23570


 20%|█▉        | 94/471 [4:34:24<16:45:15, 159.99s/it]

23442


 20%|██        | 95/471 [4:36:41<16:00:01, 153.20s/it]

23460


 20%|██        | 96/471 [4:38:41<14:54:59, 143.20s/it]

23193


 21%|██        | 97/471 [4:41:41<16:01:11, 154.20s/it]

23608


 21%|██        | 98/471 [4:45:06<17:34:14, 169.58s/it]

23199


 21%|██        | 99/471 [4:48:26<18:26:54, 178.53s/it]

23550


 21%|██        | 100/471 [4:52:03<19:35:19, 190.08s/it]

23417


 21%|██▏       | 101/471 [4:54:31<18:14:18, 177.46s/it]

23266


 22%|██▏       | 102/471 [4:56:42<16:45:24, 163.48s/it]

23480


 22%|██▏       | 103/471 [4:59:28<16:47:25, 164.25s/it]

23576


 22%|██▏       | 104/471 [5:02:44<17:43:54, 173.93s/it]

23613


 22%|██▏       | 105/471 [5:06:16<18:49:59, 185.25s/it]

23581


 23%|██▎       | 106/471 [5:09:42<19:26:04, 191.68s/it]

23313


 23%|██▎       | 107/471 [5:12:30<18:39:03, 184.46s/it]

23618


 23%|██▎       | 108/471 [5:15:30<18:27:49, 183.11s/it]

23428


 23%|██▎       | 109/471 [5:19:00<19:12:25, 191.01s/it]

23383


 23%|██▎       | 110/471 [5:21:51<18:34:41, 185.27s/it]

23458


 24%|██▎       | 111/471 [5:25:02<18:41:39, 186.94s/it]

23401


 24%|██▍       | 112/471 [5:28:18<18:54:04, 189.54s/it]

23216


 24%|██▍       | 113/471 [5:31:29<18:54:15, 190.10s/it]

23468


 24%|██▍       | 114/471 [5:35:13<19:50:33, 200.09s/it]

23260


 24%|██▍       | 115/471 [5:38:23<19:29:45, 197.15s/it]

23443


 25%|██▍       | 116/471 [5:41:19<18:48:43, 190.77s/it]

23541


 25%|██▍       | 117/471 [5:44:34<18:53:34, 192.13s/it]

23282


 25%|██▌       | 118/471 [5:47:57<19:09:58, 195.46s/it]

23487


 25%|██▌       | 119/471 [5:51:39<19:53:11, 203.39s/it]

23252


 26%|██▌       | 121/471 [5:55:03<15:12:33, 156.44s/it]

23445


 26%|██▌       | 122/471 [5:58:04<15:45:56, 162.63s/it]

23337


 26%|██▌       | 123/471 [6:01:01<16:04:47, 166.34s/it]

23210


 26%|██▋       | 124/471 [6:04:10<16:37:30, 172.48s/it]

23603


 27%|██▋       | 125/471 [6:07:48<17:49:18, 185.43s/it]

23749


 27%|██▋       | 126/471 [6:10:56<17:49:45, 186.04s/it]

23723


 27%|██▋       | 127/471 [6:14:02<17:45:46, 185.89s/it]

23873


 27%|██▋       | 128/471 [6:17:21<18:05:37, 189.91s/it]

23776


 27%|██▋       | 129/471 [6:21:02<18:54:51, 199.10s/it]

23874


 28%|██▊       | 130/471 [6:24:52<19:43:54, 208.31s/it]

23664


 28%|██▊       | 131/471 [6:27:27<18:10:38, 192.47s/it]

23899


 28%|██▊       | 132/471 [6:30:22<17:37:51, 187.23s/it]

23887


 28%|██▊       | 133/471 [6:33:06<16:54:43, 180.13s/it]

23837


 28%|██▊       | 134/471 [6:36:24<17:21:44, 185.47s/it]

23854


 29%|██▊       | 135/471 [6:39:16<16:56:27, 181.51s/it]

23756


 29%|██▉       | 136/471 [6:42:28<17:10:45, 184.61s/it]

23810


 29%|██▉       | 137/471 [6:46:09<18:08:25, 195.53s/it]

23688


 29%|██▉       | 138/471 [6:48:59<17:22:23, 187.82s/it]

23902


 30%|██▉       | 139/471 [6:52:41<18:16:08, 198.10s/it]

23863


 30%|██▉       | 141/471 [6:55:41<13:35:14, 148.23s/it]

23721


 30%|███       | 142/471 [6:58:43<14:18:19, 156.53s/it]

23859


 30%|███       | 143/471 [7:02:22<15:45:08, 172.89s/it]

23841


 31%|███       | 144/471 [7:05:41<16:22:03, 180.19s/it]

23625


 31%|███       | 145/471 [7:08:48<16:28:43, 181.97s/it]

23833


 31%|███       | 146/471 [7:12:05<16:48:15, 186.14s/it]

23862


 31%|███       | 147/471 [7:15:23<17:04:52, 189.79s/it]

23634


 31%|███▏      | 148/471 [7:18:21<16:41:53, 186.11s/it]

23827


 32%|███▏      | 149/471 [7:21:36<16:52:59, 188.76s/it]

23631


 32%|███▏      | 150/471 [7:24:11<15:57:10, 178.91s/it]

23916


 32%|███▏      | 151/471 [7:27:16<16:04:19, 180.81s/it]

23895


 32%|███▏      | 152/471 [7:29:41<15:03:42, 169.98s/it]

23733


 32%|███▏      | 153/471 [7:33:31<16:36:46, 188.07s/it]

23788


 33%|███▎      | 155/471 [7:36:38<12:40:04, 144.32s/it]

23901


 33%|███▎      | 156/471 [7:39:47<13:36:32, 155.53s/it]

23638


 33%|███▎      | 157/471 [7:42:53<14:14:52, 163.35s/it]

23826


 34%|███▎      | 158/471 [7:45:51<14:33:43, 167.49s/it]

23643


 34%|███▍      | 159/471 [7:49:48<16:12:20, 186.99s/it]

23705


 34%|███▍      | 160/471 [7:53:16<16:38:57, 192.73s/it]

23875


 34%|███▍      | 161/471 [7:57:50<18:38:15, 216.44s/it]

23831


 34%|███▍      | 162/471 [8:01:01<17:56:38, 209.06s/it]

23809


 35%|███▍      | 163/471 [8:05:04<18:44:10, 218.99s/it]

23871


 35%|███▍      | 164/471 [8:08:18<18:02:27, 211.56s/it]

23828


 35%|███▌      | 165/471 [8:10:58<16:40:06, 196.10s/it]

23794


 35%|███▌      | 166/471 [8:14:23<16:51:00, 198.89s/it]

23777


 35%|███▌      | 167/471 [8:17:52<17:02:21, 201.78s/it]

23735


 36%|███▌      | 168/471 [8:21:55<18:02:11, 214.29s/it]

23896


 36%|███▌      | 169/471 [8:25:37<18:09:05, 216.37s/it]

23714


 36%|███▌      | 170/471 [8:28:40<17:16:01, 206.52s/it]

23829


 36%|███▋      | 171/471 [8:31:46<16:41:33, 200.31s/it]

24089


 37%|███▋      | 172/471 [8:35:07<16:40:01, 200.68s/it]

24025


 37%|███▋      | 173/471 [8:38:38<16:51:35, 203.68s/it]

24075


 37%|███▋      | 174/471 [8:41:55<16:37:59, 201.61s/it]

23941


 37%|███▋      | 175/471 [8:44:50<15:55:26, 193.67s/it]

24125


 37%|███▋      | 176/471 [8:47:51<15:34:08, 190.00s/it]

24061


 38%|███▊      | 177/471 [8:51:01<15:31:07, 190.03s/it]

24193


 38%|███▊      | 178/471 [8:53:50<14:56:32, 183.59s/it]

24026


 38%|███▊      | 179/471 [8:56:40<14:33:19, 179.45s/it]

24325


 38%|███▊      | 180/471 [8:59:26<14:10:43, 175.41s/it]

24303


 38%|███▊      | 181/471 [9:03:07<15:13:45, 189.05s/it]

24239


 39%|███▊      | 182/471 [9:06:37<15:41:02, 195.37s/it]

24148


 39%|███▉      | 183/471 [9:09:35<15:12:39, 190.14s/it]

24240


 39%|███▉      | 184/471 [9:13:08<15:42:49, 197.11s/it]

24013


 39%|███▉      | 185/471 [9:16:49<16:14:13, 204.38s/it]

24042


 39%|███▉      | 186/471 [9:19:13<14:44:06, 186.13s/it]

24329


 40%|███▉      | 187/471 [9:22:28<14:53:15, 188.72s/it]

24194


 40%|███▉      | 188/471 [9:25:23<14:31:38, 184.80s/it]

24132


 41%|████      | 191/471 [9:28:42<9:11:24, 118.16s/it] 

24316


 41%|████      | 192/471 [9:31:46<10:13:27, 131.93s/it]

24153


 41%|████      | 193/471 [9:35:48<12:09:45, 157.50s/it]

24146


 41%|████      | 194/471 [9:38:17<11:57:29, 155.41s/it]

24338


 41%|████▏     | 195/471 [9:41:53<13:07:27, 171.19s/it]

23990


 42%|████▏     | 197/471 [9:46:29<11:57:30, 157.12s/it]

23952


 42%|████▏     | 198/471 [9:49:29<12:19:24, 162.51s/it]

24180


 42%|████▏     | 199/471 [9:52:02<12:04:54, 159.91s/it]

24088


 42%|████▏     | 200/471 [9:54:59<12:23:23, 164.59s/it]

23954


 43%|████▎     | 201/471 [9:58:05<12:46:18, 170.29s/it]

24067


 43%|████▎     | 202/471 [10:00:40<12:24:19, 166.02s/it]

24190


 43%|████▎     | 203/471 [10:04:14<13:23:03, 179.79s/it]

24137


 43%|████▎     | 204/471 [10:06:42<12:39:05, 170.58s/it]

24271


 44%|████▎     | 205/471 [10:09:41<12:47:27, 173.11s/it]

24313


 44%|████▎     | 206/471 [10:12:43<12:56:38, 175.85s/it]

24158


 44%|████▍     | 207/471 [10:16:16<13:41:07, 186.62s/it]

23951


 44%|████▍     | 208/471 [10:21:00<15:44:58, 215.58s/it]

24219


 44%|████▍     | 209/471 [10:23:33<14:19:53, 196.92s/it]

23975


 45%|████▍     | 210/471 [10:26:28<13:49:10, 190.62s/it]

24169


 45%|████▍     | 211/471 [10:29:31<13:35:34, 188.21s/it]

24150


 45%|████▌     | 212/471 [10:32:18<13:04:29, 181.74s/it]

23969


 45%|████▌     | 213/471 [10:35:21<13:03:19, 182.17s/it]

24000


 45%|████▌     | 214/471 [10:38:28<13:07:21, 183.82s/it]

24633


 46%|████▌     | 215/471 [10:41:42<13:16:16, 186.63s/it]

24412


 46%|████▌     | 216/471 [10:44:53<13:19:48, 188.19s/it]

24610


 46%|████▌     | 217/471 [10:48:28<13:50:24, 196.16s/it]

24460


 46%|████▋     | 218/471 [10:51:42<13:44:08, 195.45s/it]

24608


 46%|████▋     | 219/471 [10:55:19<14:08:19, 201.98s/it]

24734


 47%|████▋     | 220/471 [10:59:06<14:36:16, 209.47s/it]

24729


 47%|████▋     | 221/471 [11:03:21<15:29:11, 223.01s/it]

24376


 47%|████▋     | 222/471 [11:06:16<14:25:22, 208.52s/it]

24446


 47%|████▋     | 223/471 [11:10:45<15:37:44, 226.87s/it]

24747


 48%|████▊     | 224/471 [11:14:17<15:15:02, 222.28s/it]

24352


 48%|████▊     | 225/471 [11:17:30<14:35:49, 213.62s/it]

24687


 48%|████▊     | 226/471 [11:20:32<13:53:27, 204.11s/it]

24396


 48%|████▊     | 227/471 [11:23:38<13:27:26, 198.55s/it]

24374


 48%|████▊     | 228/471 [11:26:38<13:01:56, 193.07s/it]

24361


 49%|████▊     | 229/471 [11:29:17<12:17:58, 182.97s/it]

24450


 49%|████▉     | 230/471 [11:32:25<12:20:08, 184.27s/it]

24419


 49%|████▉     | 231/471 [11:36:05<13:00:46, 195.20s/it]

24468


 49%|████▉     | 232/471 [11:38:34<12:02:08, 181.29s/it]

24696


 49%|████▉     | 233/471 [11:41:21<11:41:56, 176.96s/it]

24415


 50%|████▉     | 234/471 [11:44:50<12:17:27, 186.70s/it]

24509


 50%|████▉     | 235/471 [11:48:14<12:34:14, 191.76s/it]

24728


 50%|█████     | 236/471 [11:53:34<15:01:58, 230.29s/it]

24702


 50%|█████     | 237/471 [11:57:08<14:39:05, 225.41s/it]

24500


 51%|█████     | 238/471 [12:01:46<15:36:51, 241.25s/it]

24344


 51%|█████     | 239/471 [12:05:07<14:45:32, 229.02s/it]

24479


 51%|█████     | 240/471 [12:08:14<13:52:56, 216.35s/it]

24614


 51%|█████     | 241/471 [12:10:34<12:21:48, 193.52s/it]

24612


 51%|█████▏    | 242/471 [12:13:00<11:24:33, 179.36s/it]

24497


 52%|█████▏    | 243/471 [12:19:07<14:54:46, 235.47s/it]

24670


 52%|█████▏    | 244/471 [12:21:41<13:18:22, 211.03s/it]

24622


 52%|█████▏    | 245/471 [12:24:18<12:14:02, 194.88s/it]

24750


 52%|█████▏    | 246/471 [12:27:33<12:11:00, 194.94s/it]

24488


 52%|█████▏    | 247/471 [12:30:02<11:16:27, 181.19s/it]

24755


 53%|█████▎    | 248/471 [12:33:54<12:09:49, 196.37s/it]

24689


 53%|█████▎    | 249/471 [12:36:41<11:34:05, 187.59s/it]

24621


 53%|█████▎    | 250/471 [12:38:51<10:27:23, 170.33s/it]

24738


 53%|█████▎    | 251/471 [12:41:50<10:34:10, 172.96s/it]

24508


 54%|█████▎    | 252/471 [12:44:48<10:36:55, 174.50s/it]

24552


 54%|█████▎    | 253/471 [12:48:19<11:13:05, 185.25s/it]

24705


 54%|█████▍    | 254/471 [12:51:26<11:12:06, 185.84s/it]

24722


 54%|█████▍    | 255/471 [12:54:37<11:14:53, 187.47s/it]

24462


 54%|█████▍    | 256/471 [12:58:41<12:12:29, 204.41s/it]

25024


 55%|█████▍    | 257/471 [13:03:09<13:17:10, 223.51s/it]

24898


 55%|█████▍    | 258/471 [13:05:41<11:57:09, 202.02s/it]

24770


 55%|█████▍    | 259/471 [13:08:50<11:39:34, 197.99s/it]

24884


 55%|█████▌    | 260/471 [13:12:42<12:12:41, 208.35s/it]

24811


 55%|█████▌    | 261/471 [13:15:35<11:32:10, 197.76s/it]

24763


 56%|█████▌    | 262/471 [13:20:32<13:12:05, 227.40s/it]

24870


 56%|█████▌    | 263/471 [13:23:56<12:44:16, 220.47s/it]

24918


 56%|█████▌    | 264/471 [13:26:23<11:25:09, 198.59s/it]

24979


 56%|█████▋    | 265/471 [13:29:12<10:50:54, 189.58s/it]

24848


 56%|█████▋    | 266/471 [13:31:51<10:16:10, 180.35s/it]

24880


 57%|█████▋    | 267/471 [13:34:01<9:21:55, 165.27s/it] 

24841


 57%|█████▋    | 269/471 [13:36:32<6:56:41, 123.77s/it]

24790


 57%|█████▋    | 270/471 [13:39:51<7:57:09, 142.44s/it]

24864


 58%|█████▊    | 271/471 [13:42:36<8:14:48, 148.44s/it]

24804


 58%|█████▊    | 272/471 [13:46:01<9:02:49, 163.67s/it]

24789


 58%|█████▊    | 273/471 [13:48:49<9:04:51, 165.11s/it]

24885


 58%|█████▊    | 274/471 [13:52:26<9:50:54, 179.97s/it]

24891


 58%|█████▊    | 275/471 [13:54:55<9:18:04, 170.84s/it]

24917


 59%|█████▊    | 276/471 [13:58:02<9:30:24, 175.51s/it]

24915


 59%|█████▉    | 277/471 [14:01:11<9:40:09, 179.43s/it]

24932


 59%|█████▉    | 278/471 [14:04:36<10:01:52, 187.11s/it]

25036


 59%|█████▉    | 279/471 [14:07:49<10:04:54, 189.03s/it]

24905


 59%|█████▉    | 280/471 [14:10:40<9:44:41, 183.67s/it] 

25022


 60%|█████▉    | 281/471 [14:13:34<9:32:15, 180.71s/it]

24824


 60%|█████▉    | 282/471 [14:16:28<9:22:37, 178.61s/it]

24867


 60%|██████    | 283/471 [14:19:39<9:31:40, 182.45s/it]

24893


 60%|██████    | 284/471 [14:22:12<9:01:13, 173.65s/it]

25315


 61%|██████    | 285/471 [14:25:09<9:00:49, 174.46s/it]

25552


 61%|██████    | 286/471 [14:28:14<9:07:24, 177.54s/it]

25197


 61%|██████    | 288/471 [14:31:24<7:05:21, 139.46s/it]

25526


 61%|██████▏   | 289/471 [14:34:21<7:31:24, 148.81s/it]

25180


 62%|██████▏   | 290/471 [14:37:37<8:06:07, 161.15s/it]

25079


 62%|██████▏   | 291/471 [14:40:37<8:18:45, 166.25s/it]

25219


 62%|██████▏   | 292/471 [14:43:41<8:31:12, 171.36s/it]

25135


 62%|██████▏   | 293/471 [14:47:22<9:10:02, 185.41s/it]

25184


 62%|██████▏   | 294/471 [14:49:35<8:22:30, 170.34s/it]

25231


 63%|██████▎   | 295/471 [14:52:13<8:09:07, 166.75s/it]

25124


 63%|██████▎   | 296/471 [14:54:53<8:00:40, 164.80s/it]

25435


 63%|██████▎   | 297/471 [14:58:13<8:27:59, 175.17s/it]

25572


 63%|██████▎   | 298/471 [15:01:56<9:06:13, 189.44s/it]

25300


 63%|██████▎   | 299/471 [15:05:12<9:08:50, 191.46s/it]

25122


 64%|██████▎   | 300/471 [15:08:47<9:25:11, 198.32s/it]

25176


 64%|██████▍   | 301/471 [15:11:45<9:04:49, 192.29s/it]

25322


 64%|██████▍   | 302/471 [15:15:23<9:23:21, 200.01s/it]

25250


 64%|██████▍   | 303/471 [15:18:46<9:22:18, 200.82s/it]

25133


 65%|██████▍   | 304/471 [15:22:08<9:20:23, 201.34s/it]

25285


 65%|██████▍   | 305/471 [15:24:43<8:38:01, 187.24s/it]

25177


 65%|██████▍   | 306/471 [15:27:39<8:25:37, 183.87s/it]

25408


 65%|██████▌   | 307/471 [15:30:26<8:09:21, 179.03s/it]

25372


 65%|██████▌   | 308/471 [15:33:51<8:27:20, 186.75s/it]

25194


 66%|██████▌   | 309/471 [15:36:57<8:23:44, 186.57s/it]

25141


 66%|██████▌   | 310/471 [15:39:59<8:16:20, 184.97s/it]

25207


 66%|██████▌   | 311/471 [15:42:25<7:42:06, 173.29s/it]

25520


 66%|██████▌   | 312/471 [15:45:37<7:54:30, 179.06s/it]

25144


 66%|██████▋   | 313/471 [15:48:32<7:48:20, 177.85s/it]

25414


 67%|██████▋   | 314/471 [15:51:27<7:43:20, 177.07s/it]

25573


 67%|██████▋   | 315/471 [15:54:46<7:57:01, 183.47s/it]

25560


 67%|██████▋   | 316/471 [15:57:58<8:00:57, 186.18s/it]

25508


 67%|██████▋   | 317/471 [16:01:06<7:59:17, 186.74s/it]

25279


 68%|██████▊   | 318/471 [16:05:17<8:45:20, 206.01s/it]

25200


 68%|██████▊   | 319/471 [16:08:12<8:17:49, 196.51s/it]

25121


 68%|██████▊   | 320/471 [16:10:38<7:36:55, 181.56s/it]

25325


 68%|██████▊   | 321/471 [16:13:52<7:43:04, 185.23s/it]

25333


 68%|██████▊   | 322/471 [16:16:17<7:10:02, 173.17s/it]

25162


 69%|██████▊   | 323/471 [16:18:50<6:52:15, 167.13s/it]

25264


 69%|██████▉   | 324/471 [16:22:42<7:37:00, 186.53s/it]

25331


 69%|██████▉   | 325/471 [16:26:16<7:53:55, 194.76s/it]

25349


 69%|██████▉   | 326/471 [16:28:42<7:15:02, 180.02s/it]

25418


 69%|██████▉   | 327/471 [16:32:37<7:52:12, 196.75s/it]

25130


 70%|██████▉   | 328/471 [16:35:35<7:35:21, 191.06s/it]

25701


 70%|██████▉   | 329/471 [16:38:52<7:35:57, 192.66s/it]

25856


 70%|███████   | 330/471 [16:41:42<7:17:18, 186.09s/it]

25634


 70%|███████   | 331/471 [16:45:16<7:33:37, 194.41s/it]

25619


 70%|███████   | 332/471 [16:48:24<7:25:44, 192.41s/it]

25710


 71%|███████   | 333/471 [16:52:21<7:53:08, 205.71s/it]

25632


 71%|███████   | 334/471 [16:57:42<9:08:59, 240.43s/it]

25631


 71%|███████   | 335/471 [17:01:17<8:47:54, 232.90s/it]

25847


 71%|███████▏  | 336/471 [17:04:12<8:04:55, 215.52s/it]

25610


 72%|███████▏  | 337/471 [17:07:03<7:31:10, 202.02s/it]

25605


 72%|███████▏  | 338/471 [17:09:09<6:37:17, 179.23s/it]

25855


 72%|███████▏  | 339/471 [17:11:57<6:27:06, 175.96s/it]

25614


 72%|███████▏  | 340/471 [17:16:50<7:40:25, 210.88s/it]

25809


 72%|███████▏  | 341/471 [17:20:06<7:27:24, 206.49s/it]

25687


 73%|███████▎  | 342/471 [17:22:51<6:57:05, 193.99s/it]

25766


 73%|███████▎  | 343/471 [17:25:39<6:37:04, 186.13s/it]

25674


 73%|███████▎  | 344/471 [17:29:06<6:47:14, 192.39s/it]

25802


 73%|███████▎  | 345/471 [17:31:55<6:29:30, 185.48s/it]

25671


 73%|███████▎  | 346/471 [17:34:35<6:10:32, 177.86s/it]

25590


 74%|███████▎  | 347/471 [17:37:02<5:48:33, 168.66s/it]

25705


 74%|███████▍  | 348/471 [17:39:41<5:40:00, 165.85s/it]

25824


 74%|███████▍  | 349/471 [17:42:31<5:39:23, 166.92s/it]

25787


 74%|███████▍  | 350/471 [17:45:01<5:26:44, 162.02s/it]

25833


 75%|███████▍  | 351/471 [17:48:21<5:46:35, 173.30s/it]

25857


 75%|███████▍  | 352/471 [17:51:09<5:40:14, 171.55s/it]

25630


 75%|███████▍  | 353/471 [17:55:49<6:41:26, 204.12s/it]

25616


 75%|███████▌  | 354/471 [17:59:08<6:35:08, 202.63s/it]

25773


 75%|███████▌  | 355/471 [18:02:41<6:37:37, 205.67s/it]

25613


 76%|███████▌  | 356/471 [18:05:16<6:05:06, 190.49s/it]

25654


 76%|███████▌  | 357/471 [18:07:53<5:43:05, 180.58s/it]

25589


 76%|███████▌  | 358/471 [18:10:41<5:33:02, 176.84s/it]

25931


 76%|███████▋  | 360/471 [18:13:46<4:14:54, 137.78s/it]

26159


 77%|███████▋  | 361/471 [18:16:00<4:11:07, 136.97s/it]

26331


 77%|███████▋  | 362/471 [18:19:03<4:30:47, 149.06s/it]

26136


 77%|███████▋  | 363/471 [18:21:50<4:36:54, 153.84s/it]

26160


 77%|███████▋  | 364/471 [18:25:06<4:55:12, 165.54s/it]

26038


 77%|███████▋  | 365/471 [18:28:47<5:20:28, 181.40s/it]

26259


 78%|███████▊  | 366/471 [18:32:03<5:24:46, 185.58s/it]

26067


 78%|███████▊  | 367/471 [18:34:16<4:55:08, 170.27s/it]

25885


 78%|███████▊  | 368/471 [18:37:12<4:55:13, 171.98s/it]

26074


 78%|███████▊  | 369/471 [18:41:24<5:32:38, 195.67s/it]

26284


 79%|███████▊  | 370/471 [18:44:12<5:15:29, 187.42s/it]

26291


 79%|███████▉  | 371/471 [18:47:22<5:13:45, 188.26s/it]

26002


 79%|███████▉  | 372/471 [18:50:06<4:58:35, 180.96s/it]

25940


 79%|███████▉  | 373/471 [18:53:40<5:12:01, 191.04s/it]

26012


 80%|███████▉  | 375/471 [18:56:37<3:49:51, 143.66s/it]

26178


 80%|███████▉  | 376/471 [18:59:40<4:03:01, 153.49s/it]

26194


 80%|████████  | 377/471 [19:02:35<4:09:11, 159.06s/it]

26163


 80%|████████  | 378/471 [19:05:55<4:23:45, 170.17s/it]

26240


 80%|████████  | 379/471 [19:08:30<4:14:36, 166.05s/it]

26317


 81%|████████  | 380/471 [19:11:09<4:08:44, 164.00s/it]

26174


 81%|████████  | 381/471 [19:14:05<4:11:01, 167.35s/it]

26095


 81%|████████  | 382/471 [19:17:31<4:25:14, 178.81s/it]

26021


 81%|████████▏ | 383/471 [19:20:11<4:13:59, 173.17s/it]

26024


 82%|████████▏ | 384/471 [19:22:50<4:04:57, 168.94s/it]

25860


 82%|████████▏ | 385/471 [19:26:30<4:24:07, 184.27s/it]

26129


 82%|████████▏ | 386/471 [19:30:44<4:50:15, 204.89s/it]

25991


 82%|████████▏ | 387/471 [19:33:48<4:38:06, 198.64s/it]

26084


 82%|████████▏ | 388/471 [19:36:44<4:25:41, 192.07s/it]

26027


 83%|████████▎ | 389/471 [19:41:09<4:52:00, 213.66s/it]

26537


 83%|████████▎ | 390/471 [19:44:22<4:40:02, 207.44s/it]

26489


 83%|████████▎ | 391/471 [19:48:54<5:02:24, 226.81s/it]

26589


 83%|████████▎ | 392/471 [19:51:17<4:25:48, 201.88s/it]

26545


 83%|████████▎ | 393/471 [19:54:22<4:15:47, 196.76s/it]

26394


 84%|████████▎ | 394/471 [19:57:40<4:12:59, 197.14s/it]

26526


 84%|████████▍ | 395/471 [20:01:27<4:21:00, 206.06s/it]

26409


 84%|████████▍ | 396/471 [20:05:03<4:21:25, 209.14s/it]

26437


 84%|████████▍ | 397/471 [20:08:02<4:06:43, 200.05s/it]

26411


 85%|████████▍ | 398/471 [20:11:12<3:59:38, 196.96s/it]

26560


 85%|████████▍ | 399/471 [20:14:07<3:48:37, 190.53s/it]

26601


 85%|████████▍ | 400/471 [20:17:46<3:55:19, 198.86s/it]

26593


 85%|████████▌ | 401/471 [20:20:19<3:36:00, 185.15s/it]

26532


 85%|████████▌ | 402/471 [20:22:44<3:19:01, 173.06s/it]

26405


 86%|████████▌ | 403/471 [20:25:58<3:23:14, 179.33s/it]

26491


 86%|████████▌ | 405/471 [20:29:04<2:33:32, 139.58s/it]

26514


 86%|████████▌ | 406/471 [20:31:56<2:39:57, 147.66s/it]

26749


 86%|████████▋ | 407/471 [20:34:48<2:44:17, 154.02s/it]

26662


 87%|████████▋ | 408/471 [20:36:50<2:32:34, 145.30s/it]

11427


 87%|████████▋ | 409/471 [20:39:17<2:30:37, 145.77s/it]

26685


 87%|████████▋ | 410/471 [20:41:17<2:20:36, 138.31s/it]

11436


 87%|████████▋ | 411/471 [20:44:44<2:38:10, 158.18s/it]

11387


 87%|████████▋ | 412/471 [20:47:30<2:37:55, 160.60s/it]

11290


 88%|████████▊ | 413/471 [20:50:23<2:38:51, 164.34s/it]

11314


 88%|████████▊ | 414/471 [20:52:40<2:28:15, 156.07s/it]

11206


 88%|████████▊ | 415/471 [20:55:39<2:32:04, 162.94s/it]

26738


 88%|████████▊ | 416/471 [20:58:17<2:27:57, 161.41s/it]

11378


 89%|████████▊ | 417/471 [21:02:17<2:46:25, 184.91s/it]

11219


 89%|████████▊ | 418/471 [21:05:23<2:43:36, 185.21s/it]

11216


 89%|████████▉ | 420/471 [21:08:22<1:59:53, 141.05s/it]

11224


 89%|████████▉ | 421/471 [21:10:58<2:00:41, 144.84s/it]

26649


 90%|████████▉ | 422/471 [21:13:46<2:03:17, 150.98s/it]

11325


 90%|████████▉ | 423/471 [21:16:43<2:06:19, 157.90s/it]

11283


 90%|█████████ | 424/471 [21:19:10<2:01:25, 155.00s/it]

26729


 90%|█████████ | 425/471 [21:22:25<2:07:33, 166.37s/it]

11277


 91%|█████████ | 427/471 [21:25:08<1:33:57, 128.13s/it]

11279


 91%|█████████ | 428/471 [21:28:16<1:42:19, 142.78s/it]

11388


 91%|█████████ | 429/471 [21:31:43<1:51:35, 159.41s/it]

11447


 91%|█████████▏| 430/471 [21:35:17<1:58:54, 174.00s/it]

11521


 92%|█████████▏| 431/471 [21:37:54<1:52:54, 169.37s/it]

11570


 92%|█████████▏| 432/471 [21:41:23<1:57:21, 180.55s/it]

11797


 92%|█████████▏| 433/471 [21:44:15<1:52:45, 178.05s/it]

11857


 92%|█████████▏| 435/471 [21:47:05<1:21:35, 135.97s/it]

11728


 93%|█████████▎| 436/471 [21:50:05<1:25:40, 146.86s/it]

11867


 93%|█████████▎| 437/471 [21:52:12<1:20:18, 141.72s/it]

11835


 93%|█████████▎| 438/471 [21:55:37<1:27:17, 158.73s/it]

11837


 93%|█████████▎| 439/471 [21:58:14<1:24:23, 158.23s/it]

11625


 93%|█████████▎| 440/471 [22:01:15<1:25:05, 164.70s/it]

11891


 94%|█████████▎| 441/471 [22:04:03<1:22:51, 165.73s/it]

11742


 94%|█████████▍| 442/471 [22:06:45<1:19:28, 164.44s/it]

11890


 94%|█████████▍| 443/471 [22:09:46<1:19:01, 169.32s/it]

11704


 94%|█████████▍| 444/471 [22:12:42<1:17:07, 171.41s/it]

11712


 94%|█████████▍| 445/471 [22:14:51<1:08:46, 158.73s/it]

11882


 95%|█████████▍| 446/471 [22:17:52<1:08:53, 165.32s/it]

11608


 95%|█████████▍| 447/471 [22:20:18<1:03:52, 159.69s/it]

11525


 95%|█████████▌| 448/471 [22:23:33<1:05:17, 170.34s/it]

11720


 95%|█████████▌| 449/471 [22:26:33<1:03:28, 173.09s/it]

11851


 96%|█████████▌| 450/471 [22:29:42<1:02:14, 177.84s/it]

11451


 96%|█████████▌| 451/471 [22:32:10<56:18, 168.90s/it]  

12110


 96%|█████████▌| 452/471 [22:36:02<59:29, 187.84s/it]

12106


 96%|█████████▌| 453/471 [22:39:01<55:32, 185.16s/it]

12239


 96%|█████████▋| 454/471 [22:41:36<49:55, 176.23s/it]

11910


 97%|█████████▋| 455/471 [22:44:59<49:04, 184.03s/it]

12056


 97%|█████████▋| 456/471 [22:47:43<44:33, 178.22s/it]

12191


 97%|█████████▋| 457/471 [22:50:22<40:13, 172.40s/it]

12075


 97%|█████████▋| 459/471 [22:53:58<28:30, 142.57s/it]

12256


 98%|█████████▊| 460/471 [22:56:38<26:56, 146.99s/it]

12226


 98%|█████████▊| 461/471 [22:58:50<23:49, 142.96s/it]

12228


 98%|█████████▊| 462/471 [23:02:23<24:18, 162.01s/it]

12078


 98%|█████████▊| 463/471 [23:05:18<22:05, 165.73s/it]

11985


 99%|█████████▊| 464/471 [23:08:12<19:37, 168.22s/it]

12192


 99%|█████████▊| 465/471 [23:10:44<16:20, 163.47s/it]

12087


 99%|█████████▉| 466/471 [23:17:12<19:05, 229.12s/it]

12042


 99%|█████████▉| 467/471 [23:20:07<14:12, 213.17s/it]

12029


100%|█████████▉| 469/471 [23:21:48<04:37, 138.89s/it]

12074


100%|█████████▉| 470/471 [23:23:47<02:13, 133.81s/it]

12281


100%|██████████| 471/471 [23:26:33<00:00, 179.18s/it]


In [98]:
pickle.dump(result_dict_story_gen_emb_tfidf_novel_method, open('25_result_dict_story_gen_emb_tfidf_novel_method_v128.p', 'wb'))

In [148]:
# this function recieves a seed and generats the relevant story to that seed without using diffusion component
def story_gen_emb_tfidf_novel_method_without_diffusion(seed_, collect_ids, min_size_generated_data_per_ts=50, min_acceptable_similarity= 0.9, min_keyword_overlapping= 0.60):
    ###SEARCH ALGORITHM       
    starting_ts = ts_finder(seed_, collect_ids)        
    seed_doc = seed_
    storyline = []
    storyline.append(seed_doc)
    storyline_keywords = keyword_extractor(tf_idf_dict[seed_doc], list_of_words_in_tfidf, 100)
    for ts_ids in list(collect_ids.keys())[starting_ts+1:]:
        #preparing conditions and noises for diffusion model
        
        data_ids = [w[0] for w in data_timestamps[ts_ids]]
        seed_embedding = contextual_dict[seed_doc]
        # temp = list()
        candidate_docs_id = collect_ids[ts_ids]
        candidate_docs_tf_idf = [keyword_extractor(tf_idf_dict[w], list_of_words_in_tfidf, 100) for w in candidate_docs_id]
        candidate_scores = [keyword_similarity(w,storyline_keywords) for w in candidate_docs_tf_idf]
        candidates = [candidate_docs_id[i] for i in range(len(candidate_docs_id)) if candidate_scores[i] > 0.50]
        if len(candidates) < 1:
        #there is no document in the timestamp with the minimum acceptable keyword overlap
            continue
        picked_generated_emb = seed_doc
        candidate_vs_generated_cosine_similarity = [distributed_similarity_(storyline, storyline_keywords, w, contextual_dict[picked_generated_emb]) for w in candidates]
        # print(candidate_vs_generated_cosine_similarity)
        max_candidate_sim = max(candidate_vs_generated_cosine_similarity)
        if max_candidate_sim < 0.5:
                # the best candidate is still not similar to the previous document. going to the next ts
            continue
        seed_doc = candidates[candidate_vs_generated_cosine_similarity.index(max_candidate_sim)]
        # add the new pick into the soryline
        storyline.append(seed_doc)
        # update the storyline_keywords
        merge_keywords(storyline_keywords, keyword_extractor(tf_idf_dict[seed_doc], list_of_words_in_tfidf, 100), 0.9)
        
    return(storyline)

In [149]:
# the proposed model without diffusion model
seen_doc = set()
result_dict_story_gen_emb_tfidf_novel_method_without_diffusion = dict()
for se in tqdm(seed_documents_for_experiments):
    if se in seen_doc:
        continue
    print(se)
    res = story_gen_emb_tfidf_novel_method_without_diffusion(se, collect_ids)
    result_dict_story_gen_emb_tfidf_novel_method_without_diffusion[se] = res
    for d in res:
        seen_doc.add(d)

  0%|          | 0/471 [00:00<?, ?it/s]

22989


  0%|          | 1/471 [00:59<7:48:01, 59.75s/it]

23079


  0%|          | 2/471 [01:57<7:38:52, 58.71s/it]

23002


  1%|          | 3/471 [02:55<7:35:33, 58.41s/it]

23124


  1%|          | 4/471 [03:53<7:32:51, 58.18s/it]

23131


  1%|          | 5/471 [04:52<7:33:48, 58.43s/it]

23119


  1%|▏         | 6/471 [05:51<7:33:38, 58.53s/it]

23192


  1%|▏         | 7/471 [07:07<8:17:16, 64.30s/it]

23066


  2%|▏         | 8/471 [08:12<8:18:23, 64.59s/it]

22988


  2%|▏         | 9/471 [09:10<8:00:10, 62.36s/it]

23159


  2%|▏         | 10/471 [10:30<8:43:03, 68.08s/it]

23107


  2%|▏         | 11/471 [11:35<8:32:35, 66.86s/it]

23022


  3%|▎         | 12/471 [12:33<8:11:22, 64.23s/it]

23045


  3%|▎         | 13/471 [13:30<7:55:13, 62.26s/it]

23110


  3%|▎         | 14/471 [14:30<7:47:56, 61.44s/it]

23084


  3%|▎         | 15/471 [15:28<7:38:38, 60.35s/it]

23017


  3%|▎         | 16/471 [16:27<7:34:19, 59.91s/it]

23036


  4%|▎         | 17/471 [17:45<8:14:01, 65.29s/it]

23023


  4%|▍         | 18/471 [18:50<8:12:50, 65.28s/it]

23005


  4%|▍         | 19/471 [19:58<8:18:08, 66.12s/it]

23123


  4%|▍         | 20/471 [20:59<8:06:23, 64.71s/it]

23061


  4%|▍         | 21/471 [21:59<7:53:13, 63.10s/it]

23121


  5%|▍         | 22/471 [22:58<7:43:34, 61.95s/it]

23073


  5%|▍         | 23/471 [23:56<7:34:17, 60.84s/it]

23020


  5%|▌         | 24/471 [25:03<7:47:22, 62.73s/it]

23163


  5%|▌         | 25/471 [26:02<7:36:48, 61.45s/it]

23177


  6%|▌         | 26/471 [27:29<8:32:06, 69.05s/it]

23077


  6%|▌         | 27/471 [28:45<8:46:43, 71.18s/it]

23130


  6%|▌         | 28/471 [29:43<8:17:56, 67.44s/it]

23188


  6%|▌         | 29/471 [30:46<8:07:00, 66.11s/it]

23054


  6%|▋         | 30/471 [31:59<8:21:12, 68.19s/it]

22990


  7%|▋         | 31/471 [32:57<7:56:59, 65.04s/it]

23189


  7%|▋         | 32/471 [34:12<8:16:23, 67.84s/it]

23115


  7%|▋         | 33/471 [35:19<8:15:18, 67.85s/it]

23040


  7%|▋         | 34/471 [36:26<8:10:28, 67.34s/it]

23013


  7%|▋         | 35/471 [37:28<7:59:35, 66.00s/it]

23185


  8%|▊         | 36/471 [38:33<7:54:20, 65.43s/it]

23126


  8%|▊         | 37/471 [39:46<8:10:28, 67.81s/it]

22997


  8%|▊         | 38/471 [40:46<7:53:00, 65.54s/it]

23060


  8%|▊         | 39/471 [41:49<7:46:49, 64.84s/it]

23041


  8%|▊         | 40/471 [42:47<7:30:15, 62.68s/it]

22986


  9%|▊         | 41/471 [44:01<7:52:35, 65.94s/it]

23004


  9%|▉         | 42/471 [45:01<7:39:31, 64.27s/it]

23137


  9%|▉         | 43/471 [46:01<7:30:01, 63.09s/it]

23057


  9%|▉         | 44/471 [46:59<7:16:54, 61.39s/it]

23136


 10%|▉         | 45/471 [48:12<7:42:03, 65.08s/it]

23068


 10%|▉         | 46/471 [49:10<7:24:15, 62.72s/it]

23064


 10%|▉         | 47/471 [50:22<7:44:10, 65.69s/it]

23146


 10%|█         | 48/471 [51:27<7:40:49, 65.37s/it]

23166


 10%|█         | 49/471 [52:25<7:24:29, 63.20s/it]

23055


 11%|█         | 50/471 [53:23<7:13:05, 61.72s/it]

23026


 11%|█         | 51/471 [54:26<7:13:57, 61.99s/it]

23018


 11%|█         | 52/471 [55:24<7:05:51, 60.98s/it]

22996


 11%|█▏        | 53/471 [56:23<7:00:25, 60.35s/it]

22998


 11%|█▏        | 54/471 [57:23<6:59:01, 60.29s/it]

23006


 12%|█▏        | 55/471 [58:21<6:52:05, 59.44s/it]

23167


 12%|█▏        | 56/471 [59:23<6:57:29, 60.36s/it]

22983


 12%|█▏        | 57/471 [1:00:22<6:52:41, 59.81s/it]

23080


 12%|█▏        | 58/471 [1:01:25<6:58:42, 60.83s/it]

23031


 13%|█▎        | 59/471 [1:02:25<6:56:24, 60.64s/it]

23035


 13%|█▎        | 60/471 [1:03:26<6:55:11, 60.61s/it]

22994


 13%|█▎        | 61/471 [1:04:24<6:48:52, 59.83s/it]

23147


 13%|█▎        | 62/471 [1:05:45<7:31:32, 66.24s/it]

23122


 13%|█▎        | 63/471 [1:06:44<7:15:31, 64.05s/it]

23104


 14%|█▎        | 64/471 [1:07:48<7:14:49, 64.10s/it]

23525


 14%|█▍        | 65/471 [1:08:46<7:00:20, 62.12s/it]

23565


 14%|█▍        | 66/471 [1:09:42<6:48:10, 60.47s/it]

23312


 14%|█▍        | 68/471 [1:10:43<5:12:06, 46.47s/it]

23400


 15%|█▍        | 69/471 [1:12:04<6:09:57, 55.22s/it]

23461


 15%|█▌        | 71/471 [1:13:13<5:10:17, 46.54s/it]

23331


 15%|█▌        | 72/471 [1:14:13<5:30:34, 49.71s/it]

23197


 15%|█▌        | 73/471 [1:15:24<6:03:11, 54.75s/it]

23298


 16%|█▌        | 74/471 [1:16:31<6:23:46, 58.00s/it]

23551


 16%|█▌        | 75/471 [1:17:33<6:29:31, 59.02s/it]

23510


 16%|█▌        | 76/471 [1:18:32<6:28:28, 59.01s/it]

23481


 16%|█▋        | 77/471 [1:19:33<6:31:56, 59.69s/it]

23466


 17%|█▋        | 79/471 [1:20:31<5:00:05, 45.93s/it]

23474


 17%|█▋        | 80/471 [1:21:43<5:39:51, 52.15s/it]

23553


 17%|█▋        | 81/471 [1:22:42<5:50:34, 53.94s/it]

23431


 17%|█▋        | 82/471 [1:23:38<5:54:12, 54.63s/it]

23359


 18%|█▊        | 83/471 [1:24:51<6:26:16, 59.73s/it]

23206


 18%|█▊        | 84/471 [1:25:58<6:38:57, 61.85s/it]

23201


 18%|█▊        | 85/471 [1:26:59<6:36:11, 61.59s/it]

23395


 18%|█▊        | 86/471 [1:27:57<6:28:05, 60.48s/it]

23291


 18%|█▊        | 87/471 [1:28:55<6:22:23, 59.75s/it]

23569


 19%|█▊        | 88/471 [1:30:00<6:30:24, 61.16s/it]

23624


 19%|█▉        | 89/471 [1:31:20<7:05:13, 66.79s/it]

23511


 19%|█▉        | 90/471 [1:32:20<6:51:11, 64.75s/it]

23296


 19%|█▉        | 91/471 [1:33:26<6:52:02, 65.06s/it]

23397


 20%|█▉        | 92/471 [1:34:31<6:52:09, 65.25s/it]

23570


 20%|█▉        | 94/471 [1:35:28<5:03:38, 48.33s/it]

23442


 20%|██        | 95/471 [1:36:27<5:19:34, 50.99s/it]

23460


 20%|██        | 96/471 [1:37:25<5:29:46, 52.76s/it]

23193


 21%|██        | 97/471 [1:38:25<5:40:35, 54.64s/it]

23199


 21%|██        | 99/471 [1:39:25<4:32:44, 43.99s/it]

23550


 21%|██        | 100/471 [1:40:23<4:51:47, 47.19s/it]

23417


 21%|██▏       | 101/471 [1:41:20<5:07:05, 49.80s/it]

23266


 22%|██▏       | 102/471 [1:42:20<5:22:21, 52.42s/it]

23480


 22%|██▏       | 103/471 [1:43:20<5:34:42, 54.57s/it]

23576


 22%|██▏       | 104/471 [1:44:47<6:28:59, 63.59s/it]

23613


 22%|██▏       | 105/471 [1:45:50<6:28:19, 63.66s/it]

23581


 23%|██▎       | 106/471 [1:46:49<6:17:18, 62.02s/it]

23313


 23%|██▎       | 107/471 [1:47:51<6:17:02, 62.15s/it]

23618


 23%|██▎       | 108/471 [1:49:09<6:44:38, 66.88s/it]

23428


 23%|██▎       | 109/471 [1:50:09<6:30:37, 64.75s/it]

23383


 23%|██▎       | 110/471 [1:51:09<6:22:04, 63.50s/it]

23458


 24%|██▎       | 111/471 [1:52:17<6:28:56, 64.82s/it]

23216


 24%|██▍       | 113/471 [1:53:15<4:47:44, 48.23s/it]

23468


 24%|██▍       | 114/471 [1:54:12<5:00:21, 50.48s/it]

23260


 24%|██▍       | 115/471 [1:55:14<5:17:30, 53.51s/it]

23443


 25%|██▍       | 116/471 [1:56:11<5:22:10, 54.45s/it]

23541


 25%|██▍       | 117/471 [1:57:14<5:34:34, 56.71s/it]

23282


 25%|██▌       | 118/471 [1:58:14<5:39:26, 57.70s/it]

23487


 25%|██▌       | 119/471 [1:59:14<5:42:37, 58.40s/it]

23247


 25%|██▌       | 120/471 [2:00:16<5:47:29, 59.40s/it]

23445


 26%|██▌       | 122/471 [2:01:16<4:27:54, 46.06s/it]

23337


 26%|██▌       | 123/471 [2:02:23<4:56:11, 51.07s/it]

23210


 26%|██▋       | 124/471 [2:03:26<5:14:00, 54.30s/it]

23603


 27%|██▋       | 125/471 [2:04:23<5:17:14, 55.01s/it]

23749


 27%|██▋       | 126/471 [2:05:21<5:21:34, 55.93s/it]

23873


 27%|██▋       | 128/471 [2:06:20<4:12:00, 44.08s/it]

23776


 27%|██▋       | 129/471 [2:07:16<4:27:58, 47.01s/it]

23874


 28%|██▊       | 130/471 [2:08:15<4:44:53, 50.13s/it]

23664


 28%|██▊       | 131/471 [2:09:14<4:57:00, 52.41s/it]

23899


 28%|██▊       | 132/471 [2:10:11<5:03:31, 53.72s/it]

23854


 29%|██▊       | 135/471 [2:11:11<3:18:11, 35.39s/it]

23756


 29%|██▉       | 136/471 [2:12:11<3:45:33, 40.40s/it]

23810


 29%|██▉       | 137/471 [2:13:10<4:08:29, 44.64s/it]

23688


 29%|██▉       | 138/471 [2:14:07<4:24:44, 47.70s/it]

23845


 30%|██▉       | 140/471 [2:15:05<3:40:50, 40.03s/it]

23863


 30%|██▉       | 141/471 [2:16:04<4:04:13, 44.40s/it]

23859


 30%|███       | 143/471 [2:17:03<3:30:38, 38.53s/it]

23841


 31%|███       | 144/471 [2:18:08<4:01:58, 44.40s/it]

23833


 31%|███       | 146/471 [2:19:10<3:31:38, 39.07s/it]

23862


 31%|███       | 147/471 [2:20:07<3:52:39, 43.08s/it]

23634


 31%|███▏      | 148/471 [2:21:07<4:12:38, 46.93s/it]

23827


 32%|███▏      | 149/471 [2:22:03<4:24:20, 49.26s/it]

23631


 32%|███▏      | 150/471 [2:23:00<4:35:10, 51.44s/it]

23895


 32%|███▏      | 152/471 [2:23:58<3:41:57, 41.75s/it]

23733


 32%|███▏      | 153/471 [2:24:56<4:01:03, 45.48s/it]

23628


 33%|███▎      | 154/471 [2:26:25<4:58:44, 56.54s/it]

23788


 33%|███▎      | 155/471 [2:27:25<5:01:55, 57.33s/it]

23901


 33%|███▎      | 156/471 [2:28:23<5:01:35, 57.44s/it]

23638


 33%|███▎      | 157/471 [2:29:22<5:03:46, 58.05s/it]

23826


 34%|███▎      | 158/471 [2:30:22<5:05:05, 58.48s/it]

23643


 34%|███▍      | 159/471 [2:31:19<5:02:25, 58.16s/it]

23705


 34%|███▍      | 160/471 [2:32:16<4:59:24, 57.76s/it]

23875


 34%|███▍      | 161/471 [2:33:13<4:57:43, 57.62s/it]

23831


 34%|███▍      | 162/471 [2:34:11<4:56:49, 57.64s/it]

23809


 35%|███▍      | 163/471 [2:35:08<4:54:23, 57.35s/it]

23871


 35%|███▍      | 164/471 [2:36:06<4:54:28, 57.55s/it]

23828


 35%|███▌      | 165/471 [2:37:05<4:56:37, 58.16s/it]

23794


 35%|███▌      | 166/471 [2:38:03<4:55:12, 58.07s/it]

23777


 35%|███▌      | 167/471 [2:39:09<5:06:55, 60.58s/it]

23735


 36%|███▌      | 168/471 [2:40:06<5:00:19, 59.47s/it]

23896


 36%|███▌      | 169/471 [2:41:09<5:04:04, 60.41s/it]

23714


 36%|███▌      | 170/471 [2:42:06<4:58:16, 59.46s/it]

23829


 36%|███▋      | 171/471 [2:43:02<4:52:28, 58.49s/it]

24075


 37%|███▋      | 174/471 [2:44:03<3:03:03, 36.98s/it]

23941


 37%|███▋      | 175/471 [2:45:39<4:04:12, 49.50s/it]

24061


 38%|███▊      | 177/471 [2:46:38<3:25:19, 41.90s/it]

24193


 38%|███▊      | 178/471 [2:47:36<3:41:09, 45.29s/it]

24026


 38%|███▊      | 179/471 [2:48:36<3:57:16, 48.76s/it]

24325


 38%|███▊      | 180/471 [2:49:50<4:27:20, 55.12s/it]

24148


 39%|███▉      | 183/471 [2:50:46<2:53:56, 36.24s/it]

24240


 39%|███▉      | 184/471 [2:51:48<3:17:47, 41.35s/it]

24013


 39%|███▉      | 185/471 [2:52:44<3:32:20, 44.55s/it]

24042


 39%|███▉      | 186/471 [2:53:40<3:44:35, 47.28s/it]

24194


 40%|███▉      | 188/471 [2:54:55<3:24:34, 43.37s/it]

24268


 40%|████      | 189/471 [2:56:00<3:46:28, 48.19s/it]

24005


 40%|████      | 190/471 [2:57:11<4:11:04, 53.61s/it]

24132


 41%|████      | 191/471 [2:58:09<4:15:34, 54.77s/it]

24316


 41%|████      | 192/471 [2:59:30<4:47:57, 61.93s/it]

24153


 41%|████      | 193/471 [3:00:26<4:39:30, 60.33s/it]

24146


 41%|████      | 194/471 [3:01:23<4:34:06, 59.37s/it]

23990


 42%|████▏     | 197/471 [3:02:20<2:49:50, 37.19s/it]

24180


 42%|████▏     | 199/471 [3:03:17<2:35:04, 34.21s/it]

24067


 43%|████▎     | 202/471 [3:04:14<2:04:16, 27.72s/it]

24190


 43%|████▎     | 203/471 [3:05:10<2:25:24, 32.56s/it]

24137


 43%|████▎     | 204/471 [3:06:06<2:45:06, 37.10s/it]

24313


 44%|████▎     | 206/471 [3:07:03<2:30:04, 33.98s/it]

23951


 44%|████▍     | 208/471 [3:08:00<2:20:34, 32.07s/it]

24169


 45%|████▍     | 211/471 [3:08:59<1:56:20, 26.85s/it]

24150


 45%|████▌     | 212/471 [3:10:06<2:25:03, 33.60s/it]

23969


 45%|████▌     | 213/471 [3:11:04<2:44:47, 38.33s/it]

24412


 46%|████▌     | 216/471 [3:12:06<2:08:50, 30.32s/it]

24610


 46%|████▌     | 217/471 [3:13:02<2:27:55, 34.94s/it]

24460


 46%|████▋     | 218/471 [3:14:02<2:48:39, 40.00s/it]

24608


 46%|████▋     | 219/471 [3:14:58<3:02:33, 43.47s/it]

24734


 47%|████▋     | 220/471 [3:15:54<3:14:57, 46.60s/it]

24729


 47%|████▋     | 221/471 [3:16:50<3:23:36, 48.87s/it]

24446


 47%|████▋     | 223/471 [3:17:45<2:44:36, 39.82s/it]

24747


 48%|████▊     | 224/471 [3:18:43<3:01:40, 44.13s/it]

24352


 48%|████▊     | 225/471 [3:19:47<3:21:12, 49.08s/it]

24687


 48%|████▊     | 226/471 [3:20:51<3:36:22, 52.99s/it]

24396


 48%|████▊     | 227/471 [3:21:50<3:41:32, 54.48s/it]

24450


 49%|████▉     | 230/471 [3:22:47<2:22:12, 35.40s/it]

24419


 49%|████▉     | 231/471 [3:23:44<2:39:05, 39.77s/it]

24468


 49%|████▉     | 232/471 [3:24:38<2:52:00, 43.18s/it]

24696


 49%|████▉     | 233/471 [3:26:26<3:53:22, 58.83s/it]

24509


 50%|████▉     | 235/471 [3:27:30<3:08:14, 47.86s/it]

24728


 50%|█████     | 236/471 [3:28:26<3:14:14, 49.60s/it]

24702


 50%|█████     | 237/471 [3:29:24<3:21:41, 51.72s/it]

24500


 51%|█████     | 238/471 [3:30:20<3:25:23, 52.89s/it]

24344


 51%|█████     | 239/471 [3:31:21<3:32:25, 54.94s/it]

24479


 51%|█████     | 240/471 [3:32:17<3:33:13, 55.39s/it]

24614


 51%|█████     | 241/471 [3:33:12<3:31:56, 55.29s/it]

24612


 51%|█████▏    | 242/471 [3:34:08<3:31:34, 55.43s/it]

24497


 52%|█████▏    | 243/471 [3:35:07<3:34:32, 56.46s/it]

24622


 52%|█████▏    | 245/471 [3:36:02<2:42:58, 43.27s/it]

24750


 52%|█████▏    | 246/471 [3:37:00<2:55:52, 46.90s/it]

24488


 52%|█████▏    | 247/471 [3:37:56<3:03:43, 49.21s/it]

24689


 53%|█████▎    | 249/471 [3:38:56<2:32:26, 41.20s/it]

24621


 53%|█████▎    | 250/471 [3:39:52<2:43:44, 44.45s/it]

24738


 53%|█████▎    | 251/471 [3:40:48<2:54:09, 47.50s/it]

24508


 54%|█████▎    | 252/471 [3:41:47<3:03:50, 50.37s/it]

24552


 54%|█████▎    | 253/471 [3:42:44<3:10:00, 52.29s/it]

24705


 54%|█████▍    | 254/471 [3:43:46<3:18:56, 55.01s/it]

24722


 54%|█████▍    | 255/471 [3:44:44<3:20:42, 55.75s/it]

24462


 54%|█████▍    | 256/471 [3:45:42<3:22:47, 56.59s/it]

25024


 55%|█████▍    | 257/471 [3:46:37<3:19:51, 56.04s/it]

24898


 55%|█████▍    | 258/471 [3:47:32<3:18:15, 55.85s/it]

24811


 55%|█████▌    | 261/471 [3:48:32<2:04:49, 35.66s/it]

24763


 56%|█████▌    | 262/471 [3:49:27<2:18:38, 39.80s/it]

24870


 56%|█████▌    | 263/471 [3:50:25<2:32:27, 43.98s/it]

24918


 56%|█████▌    | 264/471 [3:51:22<2:42:43, 47.17s/it]

24848


 56%|█████▋    | 266/471 [3:52:18<2:14:12, 39.28s/it]

24880


 57%|█████▋    | 267/471 [3:53:15<2:27:06, 43.27s/it]

24791


 57%|█████▋    | 268/471 [3:54:10<2:36:27, 46.24s/it]

24790


 57%|█████▋    | 270/471 [3:55:06<2:09:27, 38.64s/it]

24864


 58%|█████▊    | 271/471 [3:56:00<2:20:54, 42.27s/it]

24789


 58%|█████▊    | 273/471 [3:56:55<2:00:22, 36.48s/it]

24891


 58%|█████▊    | 275/471 [3:57:50<1:48:21, 33.17s/it]

24917


 59%|█████▊    | 276/471 [3:58:45<2:02:30, 37.69s/it]

24932


 59%|█████▉    | 278/471 [3:59:41<1:49:36, 34.07s/it]

24905


 59%|█████▉    | 280/471 [4:00:37<1:41:46, 31.97s/it]

25022


 60%|█████▉    | 281/471 [4:01:32<1:55:54, 36.60s/it]

24893


 60%|██████    | 284/471 [4:02:30<1:28:58, 28.55s/it]

25315


 61%|██████    | 285/471 [4:03:25<1:43:20, 33.34s/it]

25552


 61%|██████    | 286/471 [4:04:21<1:57:08, 37.99s/it]

25299


 61%|██████    | 287/471 [4:05:20<2:10:59, 42.72s/it]

25526


 61%|██████▏   | 289/471 [4:06:21<1:55:19, 38.02s/it]

25180


 62%|██████▏   | 290/471 [4:07:17<2:06:24, 41.90s/it]

25079


 62%|██████▏   | 291/471 [4:08:12<2:15:15, 45.09s/it]

25219


 62%|██████▏   | 292/471 [4:09:21<2:32:39, 51.17s/it]

25184


 62%|██████▏   | 294/471 [4:10:18<2:02:39, 41.58s/it]

25231


 63%|██████▎   | 295/471 [4:11:15<2:12:07, 45.04s/it]

25124


 63%|██████▎   | 296/471 [4:12:09<2:18:19, 47.42s/it]

25435


 63%|██████▎   | 297/471 [4:13:15<2:31:37, 52.29s/it]

25572


 63%|██████▎   | 298/471 [4:14:16<2:37:40, 54.69s/it]

25300


 63%|██████▎   | 299/471 [4:15:10<2:36:04, 54.45s/it]

25176


 64%|██████▍   | 301/471 [4:16:08<2:02:15, 43.15s/it]

25322


 64%|██████▍   | 302/471 [4:17:03<2:09:32, 45.99s/it]

25250


 64%|██████▍   | 303/471 [4:17:58<2:15:05, 48.25s/it]

25133


 65%|██████▍   | 304/471 [4:18:52<2:18:40, 49.82s/it]

25285


 65%|██████▍   | 305/471 [4:19:47<2:21:32, 51.16s/it]

25408


 65%|██████▌   | 307/471 [4:20:44<1:52:20, 41.10s/it]

25372


 65%|██████▌   | 308/471 [4:21:42<2:03:09, 45.33s/it]

25194


 66%|██████▌   | 309/471 [4:22:39<2:10:10, 48.21s/it]

25141


 66%|██████▌   | 310/471 [4:23:57<2:30:29, 56.09s/it]

25207


 66%|██████▌   | 311/471 [4:24:51<2:28:35, 55.72s/it]

25520


 66%|██████▌   | 312/471 [4:25:46<2:27:01, 55.48s/it]

25573


 67%|██████▋   | 315/471 [4:26:41<1:31:06, 35.04s/it]

25560


 67%|██████▋   | 316/471 [4:27:35<1:40:31, 38.91s/it]

25279


 68%|██████▊   | 318/471 [4:28:29<1:27:54, 34.48s/it]

25200


 68%|██████▊   | 319/471 [4:29:26<1:39:25, 39.25s/it]

25121


 68%|██████▊   | 320/471 [4:30:20<1:47:35, 42.75s/it]

25325


 68%|██████▊   | 321/471 [4:31:18<1:56:04, 46.43s/it]

25333


 68%|██████▊   | 322/471 [4:32:14<2:01:46, 49.04s/it]

25162


 69%|██████▊   | 323/471 [4:33:09<2:05:03, 50.70s/it]

25264


 69%|██████▉   | 324/471 [4:34:04<2:06:47, 51.75s/it]

25331


 69%|██████▉   | 325/471 [4:34:59<2:08:22, 52.76s/it]

25349


 69%|██████▉   | 326/471 [4:35:55<2:09:24, 53.55s/it]

25130


 70%|██████▉   | 328/471 [4:36:51<1:40:07, 42.01s/it]

25701


 70%|██████▉   | 329/471 [4:37:46<1:46:55, 45.18s/it]

25634


 70%|███████   | 331/471 [4:38:43<1:29:19, 38.28s/it]

25619


 70%|███████   | 332/471 [4:39:41<1:39:04, 42.77s/it]

25632


 71%|███████   | 334/471 [4:40:35<1:23:37, 36.63s/it]

25605


 72%|███████▏  | 338/471 [4:41:28<53:53, 24.31s/it]  

25855


 72%|███████▏  | 339/471 [4:42:21<1:03:32, 28.88s/it]

25614


 72%|███████▏  | 340/471 [4:43:14<1:12:49, 33.35s/it]

25809


 72%|███████▏  | 341/471 [4:44:10<1:22:15, 37.96s/it]

25687


 73%|███████▎  | 342/471 [4:45:05<1:30:11, 41.95s/it]

25766


 73%|███████▎  | 343/471 [4:46:02<1:37:32, 45.73s/it]

25674


 73%|███████▎  | 344/471 [4:46:58<1:42:08, 48.25s/it]

25590


 74%|███████▎  | 347/471 [4:47:57<1:08:31, 33.16s/it]

25705


 74%|███████▍  | 348/471 [4:48:51<1:16:48, 37.46s/it]

25824


 74%|███████▍  | 349/471 [4:49:46<1:23:55, 41.27s/it]

25787


 74%|███████▍  | 350/471 [4:50:40<1:29:22, 44.31s/it]

25630


 75%|███████▍  | 353/471 [4:51:34<1:00:45, 30.89s/it]

25616


 75%|███████▌  | 354/471 [4:52:27<1:08:48, 35.29s/it]

25773


 75%|███████▌  | 355/471 [4:53:21<1:16:12, 39.42s/it]

25613


 76%|███████▌  | 356/471 [4:54:15<1:22:01, 42.79s/it]

25589


 76%|███████▌  | 358/471 [4:55:10<1:08:48, 36.53s/it]

26132


 76%|███████▌  | 359/471 [4:56:20<1:22:11, 44.03s/it]

25931


 76%|███████▋  | 360/471 [4:57:15<1:26:28, 46.75s/it]

26159


 77%|███████▋  | 361/471 [4:58:14<1:31:48, 50.08s/it]

26038


 77%|███████▋  | 365/471 [4:59:08<49:50, 28.21s/it]  

26259


 78%|███████▊  | 366/471 [5:00:10<59:42, 34.12s/it]

26067


 78%|███████▊  | 367/471 [5:01:04<1:06:14, 38.22s/it]

25885


 78%|███████▊  | 368/471 [5:02:26<1:22:06, 47.83s/it]

26074


 78%|███████▊  | 369/471 [5:03:19<1:23:24, 49.06s/it]

26284


 79%|███████▊  | 370/471 [5:04:13<1:24:57, 50.47s/it]

26291


 79%|███████▉  | 371/471 [5:05:06<1:25:17, 51.18s/it]

25940


 79%|███████▉  | 373/471 [5:06:01<1:06:43, 40.86s/it]

26178


 80%|███████▉  | 376/471 [5:06:55<47:17, 29.86s/it]  

26194


 80%|████████  | 377/471 [5:07:55<55:31, 35.44s/it]

26240


 80%|████████  | 379/471 [5:08:49<49:51, 32.52s/it]

26317


 81%|████████  | 380/471 [5:09:52<58:24, 38.51s/it]

26095


 81%|████████  | 382/471 [5:10:48<51:38, 34.81s/it]

26021


 81%|████████▏ | 383/471 [5:11:43<57:07, 38.95s/it]

26024


 82%|████████▏ | 384/471 [5:12:36<1:01:08, 42.17s/it]

25860


 82%|████████▏ | 385/471 [5:13:29<1:04:06, 44.73s/it]

26129


 82%|████████▏ | 386/471 [5:14:30<1:09:13, 48.86s/it]

25991


 82%|████████▏ | 387/471 [5:15:41<1:16:52, 54.91s/it]

26027


 83%|████████▎ | 389/471 [5:16:34<58:05, 42.51s/it]  

26489


 83%|████████▎ | 391/471 [5:17:29<49:02, 36.79s/it]

26589


 83%|████████▎ | 392/471 [5:18:24<53:28, 40.62s/it]

26545


 83%|████████▎ | 393/471 [5:19:16<56:29, 43.45s/it]

26394


 84%|████████▎ | 394/471 [5:20:09<58:41, 45.73s/it]

26560


 85%|████████▍ | 399/471 [5:21:01<27:42, 23.09s/it]

26593


 85%|████████▌ | 401/471 [5:21:55<28:07, 24.11s/it]

26405


 86%|████████▌ | 403/471 [5:22:49<28:18, 24.97s/it]

26514


 86%|████████▌ | 406/471 [5:23:42<24:02, 22.19s/it]

26749


 86%|████████▋ | 407/471 [5:24:40<29:30, 27.66s/it]

26662


 87%|████████▋ | 408/471 [5:25:32<33:31, 31.93s/it]

11436


 87%|████████▋ | 411/471 [5:26:35<27:14, 27.24s/it]

11387


 87%|████████▋ | 412/471 [5:27:28<31:05, 31.62s/it]

11290


 88%|████████▊ | 413/471 [5:28:27<35:43, 36.95s/it]

11314


 88%|████████▊ | 414/471 [5:29:20<38:26, 40.46s/it]

11206


 88%|████████▊ | 415/471 [5:30:18<41:40, 44.64s/it]

11378


 89%|████████▊ | 417/471 [5:31:11<33:38, 37.37s/it]

11205


 89%|████████▉ | 419/471 [5:32:51<36:20, 41.93s/it]

11224


 89%|████████▉ | 421/471 [5:33:43<30:22, 36.44s/it]

26729


 90%|█████████ | 425/471 [5:34:41<19:37, 25.59s/it]

11416


 90%|█████████ | 426/471 [5:35:33<22:10, 29.56s/it]

11277


 91%|█████████ | 427/471 [5:36:56<28:31, 38.90s/it]

11279


 91%|█████████ | 428/471 [5:37:48<29:47, 41.56s/it]

11388


 91%|█████████ | 429/471 [5:38:42<31:04, 44.39s/it]

11447


 91%|█████████▏| 430/471 [5:39:34<31:41, 46.37s/it]

11521


 92%|█████████▏| 431/471 [5:40:27<32:00, 48.00s/it]

11570


 92%|█████████▏| 432/471 [5:41:19<31:51, 49.02s/it]

11854


 92%|█████████▏| 434/471 [5:42:11<24:04, 39.03s/it]

11857


 92%|█████████▏| 435/471 [5:43:03<25:16, 42.12s/it]

11867


 93%|█████████▎| 437/471 [5:43:56<20:20, 35.91s/it]

11835


 93%|█████████▎| 438/471 [5:44:49<21:45, 39.57s/it]

11837


 93%|█████████▎| 439/471 [5:45:41<22:41, 42.53s/it]

11625


 93%|█████████▎| 440/471 [5:46:32<23:11, 44.88s/it]

11742


 94%|█████████▍| 442/471 [5:47:24<17:45, 36.76s/it]

11890


 94%|█████████▍| 443/471 [5:48:15<18:43, 40.13s/it]

11704


 94%|█████████▍| 444/471 [5:49:07<19:26, 43.19s/it]

11882


 95%|█████████▍| 446/471 [5:50:02<15:15, 36.61s/it]

11608


 95%|█████████▍| 447/471 [5:50:54<16:04, 40.17s/it]

11525


 95%|█████████▌| 448/471 [5:51:47<16:35, 43.29s/it]

11720


 95%|█████████▌| 449/471 [5:52:39<16:43, 45.61s/it]

11451


 96%|█████████▌| 451/471 [5:53:36<12:43, 38.19s/it]

12110


 96%|█████████▌| 452/471 [5:54:29<13:11, 41.65s/it]

12106


 96%|█████████▌| 453/471 [5:56:19<17:38, 58.83s/it]

12239


 96%|█████████▋| 454/471 [5:57:11<16:08, 56.95s/it]

11910


 97%|█████████▋| 455/471 [5:58:07<15:06, 56.65s/it]

12056


 97%|█████████▋| 456/471 [5:59:57<17:57, 71.82s/it]

12191


 97%|█████████▋| 457/471 [6:00:55<15:50, 67.88s/it]

12075


 97%|█████████▋| 459/471 [6:01:46<09:46, 48.86s/it]

12256


 98%|█████████▊| 460/471 [6:02:37<09:02, 49.28s/it]

12226


 98%|█████████▊| 461/471 [6:03:27<08:15, 49.54s/it]

12228


 98%|█████████▊| 462/471 [6:04:19<07:30, 50.04s/it]

12078


 98%|█████████▊| 463/471 [6:05:12<06:47, 50.99s/it]

11985


 99%|█████████▊| 464/471 [6:06:04<05:57, 51.08s/it]

12192


 99%|█████████▊| 465/471 [6:07:23<05:55, 59.18s/it]

12042


 99%|█████████▉| 467/471 [6:08:29<03:09, 47.38s/it]

12074


100%|█████████▉| 470/471 [6:09:19<00:32, 32.31s/it]

12281


100%|██████████| 471/471 [6:11:10<00:00, 47.28s/it]


In [151]:
pickle.dump(result_dict_story_gen_emb_tfidf_novel_method_without_diffusion, open('25_result_dict_story_gen_emb_tfidf_novel_method_without_diffusion_v128.p', 'wb'))

In [258]:
# this function recieves a seed and generats the relevant story to that seed without using diffusion component
def story_gen_just_bert_model(seed_, collect_ids, min_acceptable_similarity= 0.90):
    ###SEARCH ALGORITHM         
    min_acceptable_similarity= 0.86
    min_keyword_overlapping= 0.60
    starting_ts = ts_finder(seed_, collect_ids)        
    seed_doc = seed_
    storyline = []
    storyline.append(seed_doc)
    for ts_ids in list(collect_ids.keys())[starting_ts+1:]:
        #preparing conditions and noises for diffusion model
        
        data_ids = [w[0] for w in data_timestamps[ts_ids]]
        seed_embedding = contextual_dict[seed_doc]
        temp = list()
        candidate_docs_id = collect_ids[ts_ids]
        candidate_scores = [cosine_similarity([contextual_dict[w]],[seed_embedding])[0][0] for w in candidate_docs_id]
        candidates = [(candidate_docs_id[i],candidate_scores[i]) for i in range(len(candidate_docs_id)) if candidate_scores[i] > min_acceptable_similarity]
        if len(candidates) < 1:
        #there is no document in the timestamp with the minimum acceptable keyword overlap
            continue
    
        candidate_docs_tf_idf = [keyword_extractor(tf_idf_dict[w[0]], list_of_words_in_tfidf, 100) for w in candidates]
        candidate_scores = [keyword_similarity(w,storyline_keywords) for w in candidate_docs_tf_idf]
        candidate_scores = [w for w in candidate_scores if w > min_keyword_overlapping]
        if len(candidate_scores)<1:
            continue
        
        final_score = [m*z[1] for m,z in zip(candidate_scores, candidates)]
        max_candidate_sim = final_score.index(max(final_score))
        seed_doc = candidates[max_candidate_sim][0]
        
        # add the new pick into the soryline
        storyline.append(seed_doc)
        # # update the storyline_keywords
        
    return storyline

In [259]:
#just bert model
seen_doc = set()
result_dict_story_gen_just_bert_model = dict()
for se in tqdm(seed_documents_for_experiments):
    if se in seen_doc:
        continue
    print(se)
    res = story_gen_just_bert_model(se, collect_ids)
    result_dict_story_gen_just_bert_model[se] = res
    for d in res:
        seen_doc.add(d)

  0%|          | 0/471 [00:00<?, ?it/s]

22989


  0%|          | 1/471 [00:12<1:34:25, 12.05s/it]

23079


  0%|          | 2/471 [00:15<54:52,  7.02s/it]  

23002


  1%|          | 3/471 [00:20<46:56,  6.02s/it]

23124


  1%|          | 4/471 [00:26<45:54,  5.90s/it]

23131


  1%|          | 5/471 [00:29<39:56,  5.14s/it]

23119


  1%|▏         | 6/471 [00:34<39:10,  5.06s/it]

23192


  1%|▏         | 7/471 [00:41<42:56,  5.55s/it]

23066


  2%|▏         | 8/471 [00:47<45:07,  5.85s/it]

22988


  2%|▏         | 9/471 [00:55<49:51,  6.47s/it]

23159


  2%|▏         | 10/471 [01:04<56:15,  7.32s/it]

23107


  2%|▏         | 11/471 [01:11<54:12,  7.07s/it]

23022


  3%|▎         | 12/471 [01:15<48:10,  6.30s/it]

23045


  3%|▎         | 13/471 [01:22<48:40,  6.38s/it]

23110


  3%|▎         | 14/471 [01:31<54:17,  7.13s/it]

23084


  3%|▎         | 15/471 [01:35<46:47,  6.16s/it]

23017


  3%|▎         | 16/471 [01:46<57:32,  7.59s/it]

23036


  4%|▎         | 17/471 [01:56<1:04:45,  8.56s/it]

23023


  4%|▍         | 18/471 [02:03<1:00:59,  8.08s/it]

23005


  4%|▍         | 19/471 [02:07<49:37,  6.59s/it]  

23123


  4%|▍         | 20/471 [02:17<58:50,  7.83s/it]

23061


  4%|▍         | 21/471 [02:28<1:05:07,  8.68s/it]

23121


  5%|▍         | 22/471 [02:39<1:09:33,  9.29s/it]

23073


  5%|▍         | 23/471 [02:44<1:00:36,  8.12s/it]

23020


  5%|▌         | 24/471 [02:54<1:03:48,  8.57s/it]

23163


  5%|▌         | 25/471 [02:57<53:03,  7.14s/it]  

23177


  6%|▌         | 26/471 [03:03<48:46,  6.58s/it]

23077


  6%|▌         | 27/471 [03:07<43:42,  5.91s/it]

23130


  6%|▌         | 28/471 [03:17<51:50,  7.02s/it]

23188


  6%|▌         | 29/471 [03:21<46:44,  6.34s/it]

23054


  6%|▋         | 30/471 [03:32<55:07,  7.50s/it]

22990


  7%|▋         | 31/471 [03:37<50:45,  6.92s/it]

23189


  7%|▋         | 32/471 [03:47<57:30,  7.86s/it]

23115


  7%|▋         | 33/471 [03:55<57:10,  7.83s/it]

23040


  7%|▋         | 34/471 [04:02<55:55,  7.68s/it]

23013


  7%|▋         | 35/471 [04:08<50:46,  6.99s/it]

23185


  8%|▊         | 36/471 [04:17<54:58,  7.58s/it]

23126


  8%|▊         | 37/471 [04:26<57:55,  8.01s/it]

22997


  8%|▊         | 38/471 [04:36<1:03:33,  8.81s/it]

23060


  8%|▊         | 39/471 [04:47<1:06:50,  9.28s/it]

23041


  8%|▊         | 40/471 [04:53<59:42,  8.31s/it]  

22986


  9%|▊         | 41/471 [05:03<1:03:42,  8.89s/it]

23004


  9%|▉         | 42/471 [05:08<55:30,  7.76s/it]  

23137


  9%|▉         | 43/471 [05:14<51:16,  7.19s/it]

23057


  9%|▉         | 44/471 [05:19<47:03,  6.61s/it]

23136


 10%|▉         | 45/471 [05:27<49:30,  6.97s/it]

23068


 10%|▉         | 46/471 [05:37<55:56,  7.90s/it]

23064


 10%|▉         | 47/471 [05:44<52:43,  7.46s/it]

23146


 10%|█         | 48/471 [05:52<54:12,  7.69s/it]

23166


 10%|█         | 49/471 [05:55<43:31,  6.19s/it]

23055


 11%|█         | 50/471 [06:06<55:14,  7.87s/it]

23026


 11%|█         | 51/471 [06:12<51:04,  7.30s/it]

23018


 11%|█         | 52/471 [06:19<50:14,  7.19s/it]

22996


 11%|█▏        | 53/471 [06:29<54:36,  7.84s/it]

22998


 11%|█▏        | 54/471 [06:34<49:36,  7.14s/it]

23006


 12%|█▏        | 55/471 [06:43<54:10,  7.81s/it]

23167


 12%|█▏        | 56/471 [06:54<59:14,  8.56s/it]

22983


 12%|█▏        | 57/471 [07:05<1:05:10,  9.45s/it]

23080


 12%|█▏        | 58/471 [07:14<1:03:43,  9.26s/it]

23031


 13%|█▎        | 59/471 [07:26<1:09:02, 10.06s/it]

23035


 13%|█▎        | 60/471 [07:33<1:01:47,  9.02s/it]

22994


 13%|█▎        | 61/471 [07:38<54:32,  7.98s/it]  

23147


 13%|█▎        | 62/471 [07:44<49:37,  7.28s/it]

23122


 13%|█▎        | 63/471 [07:48<43:12,  6.36s/it]

23104


 14%|█▎        | 64/471 [07:50<35:00,  5.16s/it]

23525


 14%|█▍        | 65/471 [08:02<48:29,  7.17s/it]

23565


 14%|█▍        | 66/471 [08:13<55:26,  8.21s/it]

23489


 14%|█▍        | 67/471 [08:24<1:00:20,  8.96s/it]

23312


 14%|█▍        | 68/471 [08:29<52:57,  7.88s/it]  

23400


 15%|█▍        | 69/471 [08:40<58:08,  8.68s/it]

23344


 15%|█▍        | 70/471 [08:50<1:00:50,  9.10s/it]

23461


 15%|█▌        | 71/471 [09:01<1:04:57,  9.74s/it]

23331


 15%|█▌        | 72/471 [09:08<1:00:04,  9.03s/it]

23197


 15%|█▌        | 73/471 [09:16<56:52,  8.57s/it]  

23298


 16%|█▌        | 74/471 [09:22<51:12,  7.74s/it]

23551


 16%|█▌        | 75/471 [09:28<47:40,  7.22s/it]

23510


 16%|█▌        | 76/471 [09:37<52:03,  7.91s/it]

23481


 16%|█▋        | 77/471 [09:47<55:23,  8.43s/it]

23548


 17%|█▋        | 78/471 [09:52<49:22,  7.54s/it]

23466


 17%|█▋        | 79/471 [09:56<41:29,  6.35s/it]

23474


 17%|█▋        | 80/471 [10:07<50:49,  7.80s/it]

23553


 17%|█▋        | 81/471 [10:13<47:00,  7.23s/it]

23431


 17%|█▋        | 82/471 [10:15<37:04,  5.72s/it]

23359


 18%|█▊        | 83/471 [10:25<45:33,  7.04s/it]

23206


 18%|█▊        | 84/471 [10:34<49:17,  7.64s/it]

23201


 18%|█▊        | 85/471 [10:44<53:42,  8.35s/it]

23395


 18%|█▊        | 86/471 [10:50<47:47,  7.45s/it]

23291


 18%|█▊        | 87/471 [10:54<41:27,  6.48s/it]

23569


 19%|█▊        | 88/471 [10:58<37:17,  5.84s/it]

23624


 19%|█▉        | 89/471 [11:05<38:18,  6.02s/it]

23511


 19%|█▉        | 90/471 [11:15<46:52,  7.38s/it]

23296


 19%|█▉        | 91/471 [11:19<39:32,  6.24s/it]

23397


 20%|█▉        | 92/471 [11:29<47:20,  7.50s/it]

23248


 20%|█▉        | 93/471 [11:40<54:25,  8.64s/it]

23570


 20%|█▉        | 94/471 [11:42<41:48,  6.65s/it]

23442


 20%|██        | 95/471 [11:50<43:13,  6.90s/it]

23460


 20%|██        | 96/471 [11:59<47:56,  7.67s/it]

23193


 21%|██        | 97/471 [12:05<43:54,  7.04s/it]

23608


 21%|██        | 98/471 [12:15<49:05,  7.90s/it]

23199


 21%|██        | 99/471 [12:26<55:44,  8.99s/it]

23550


 21%|██        | 100/471 [12:32<49:22,  7.98s/it]

23417


 21%|██▏       | 101/471 [12:37<42:56,  6.96s/it]

23266


 22%|██▏       | 102/471 [12:40<35:55,  5.84s/it]

23480


 22%|██▏       | 103/471 [12:48<40:51,  6.66s/it]

23576


 22%|██▏       | 104/471 [12:59<47:49,  7.82s/it]

23613


 22%|██▏       | 105/471 [13:05<44:32,  7.30s/it]

23581


 23%|██▎       | 106/471 [13:15<49:21,  8.11s/it]

23313


 23%|██▎       | 107/471 [13:20<44:20,  7.31s/it]

23618


 23%|██▎       | 108/471 [13:31<50:12,  8.30s/it]

23428


 23%|██▎       | 109/471 [13:38<46:51,  7.77s/it]

23383


 23%|██▎       | 110/471 [13:50<54:25,  9.04s/it]

23458


 24%|██▎       | 111/471 [14:00<56:56,  9.49s/it]

23401


 24%|██▍       | 112/471 [14:10<57:03,  9.54s/it]

23216


 24%|██▍       | 113/471 [14:14<47:09,  7.90s/it]

23468


 24%|██▍       | 114/471 [14:20<44:01,  7.40s/it]

23260


 24%|██▍       | 115/471 [14:30<49:01,  8.26s/it]

23443


 25%|██▍       | 116/471 [14:40<51:18,  8.67s/it]

23541


 25%|██▍       | 117/471 [14:45<45:18,  7.68s/it]

23282


 25%|██▌       | 118/471 [14:53<45:05,  7.66s/it]

23487


 25%|██▌       | 119/471 [15:05<52:06,  8.88s/it]

23247


 25%|██▌       | 120/471 [15:15<53:58,  9.23s/it]

23252


 26%|██▌       | 121/471 [15:22<51:01,  8.75s/it]

23445


 26%|██▌       | 122/471 [15:27<44:01,  7.57s/it]

23337


 26%|██▌       | 123/471 [15:37<47:56,  8.27s/it]

23210


 26%|██▋       | 124/471 [15:48<51:53,  8.97s/it]

23603


 27%|██▋       | 125/471 [15:57<52:10,  9.05s/it]

23749


 27%|██▋       | 126/471 [16:02<45:56,  7.99s/it]

23723


 27%|██▋       | 127/471 [16:06<37:17,  6.50s/it]

23873


 27%|██▋       | 128/471 [16:13<38:58,  6.82s/it]

23776


 27%|██▋       | 129/471 [16:15<30:32,  5.36s/it]

23874


 28%|██▊       | 130/471 [16:20<29:48,  5.24s/it]

23664


 28%|██▊       | 131/471 [16:25<29:53,  5.27s/it]

23899


 28%|██▊       | 132/471 [16:35<36:51,  6.52s/it]

23887


 28%|██▊       | 133/471 [16:46<45:04,  8.00s/it]

23837


 28%|██▊       | 134/471 [16:51<39:53,  7.10s/it]

23854


 29%|██▊       | 135/471 [17:01<43:51,  7.83s/it]

23756


 29%|██▉       | 136/471 [17:11<47:03,  8.43s/it]

23810


 29%|██▉       | 137/471 [17:21<50:08,  9.01s/it]

23688


 29%|██▉       | 138/471 [17:26<43:45,  7.89s/it]

23902


 30%|██▉       | 139/471 [17:35<45:53,  8.29s/it]

23845


 30%|██▉       | 140/471 [17:45<48:20,  8.76s/it]

23863


 30%|██▉       | 141/471 [17:54<47:57,  8.72s/it]

23721


 30%|███       | 142/471 [18:04<49:58,  9.12s/it]

23859


 30%|███       | 143/471 [18:14<51:41,  9.45s/it]

23841


 31%|███       | 144/471 [18:17<40:36,  7.45s/it]

23625


 31%|███       | 145/471 [18:27<44:17,  8.15s/it]

23833


 31%|███       | 146/471 [18:33<41:00,  7.57s/it]

23862


 31%|███       | 147/471 [18:41<41:24,  7.67s/it]

23634


 31%|███▏      | 148/471 [18:52<47:37,  8.85s/it]

23827


 32%|███▏      | 149/471 [19:02<48:12,  8.98s/it]

23631


 32%|███▏      | 150/471 [19:11<49:03,  9.17s/it]

23916


 32%|███▏      | 151/471 [19:17<42:59,  8.06s/it]

23895


 32%|███▏      | 152/471 [19:21<37:09,  6.99s/it]

23733


 32%|███▏      | 153/471 [19:23<29:14,  5.52s/it]

23628


 33%|███▎      | 154/471 [19:29<28:42,  5.43s/it]

23788


 33%|███▎      | 155/471 [19:31<24:25,  4.64s/it]

23901


 33%|███▎      | 156/471 [19:38<27:10,  5.18s/it]

23638


 33%|███▎      | 157/471 [19:45<30:50,  5.89s/it]

23826


 34%|███▎      | 158/471 [19:52<31:23,  6.02s/it]

23643


 34%|███▍      | 159/471 [19:54<25:22,  4.88s/it]

23705


 34%|███▍      | 160/471 [20:01<28:53,  5.58s/it]

23875


 34%|███▍      | 161/471 [20:10<33:07,  6.41s/it]

23831


 34%|███▍      | 162/471 [20:21<41:00,  7.96s/it]

23809


 35%|███▍      | 163/471 [20:23<31:37,  6.16s/it]

23871


 35%|███▍      | 164/471 [20:26<26:28,  5.18s/it]

23828


 35%|███▌      | 165/471 [20:30<24:56,  4.89s/it]

23794


 35%|███▌      | 166/471 [20:33<21:00,  4.13s/it]

23777


 35%|███▌      | 167/471 [20:42<28:43,  5.67s/it]

23735


 36%|███▌      | 168/471 [20:48<29:01,  5.75s/it]

23896


 36%|███▌      | 169/471 [20:51<25:43,  5.11s/it]

23714


 36%|███▌      | 170/471 [21:01<32:41,  6.52s/it]

23829


 36%|███▋      | 171/471 [21:06<29:27,  5.89s/it]

23941


 37%|███▋      | 175/471 [21:15<18:13,  3.69s/it]

24125


 37%|███▋      | 176/471 [21:25<23:23,  4.76s/it]

24193


 38%|███▊      | 178/471 [21:33<21:57,  4.50s/it]

24026


 38%|███▊      | 179/471 [21:39<23:25,  4.81s/it]

24325


 38%|███▊      | 180/471 [21:48<28:05,  5.79s/it]

24303


 38%|███▊      | 181/471 [21:53<27:21,  5.66s/it]

24239


 39%|███▊      | 182/471 [22:03<32:36,  6.77s/it]

24148


 39%|███▉      | 183/471 [22:13<35:52,  7.47s/it]

24240


 39%|███▉      | 184/471 [22:20<34:52,  7.29s/it]

24013


 39%|███▉      | 185/471 [22:29<37:23,  7.85s/it]

24042


 39%|███▉      | 186/471 [22:36<36:08,  7.61s/it]

24329


 40%|███▉      | 187/471 [22:45<38:25,  8.12s/it]

24194


 40%|███▉      | 188/471 [22:55<40:29,  8.59s/it]

24268


 40%|████      | 189/471 [23:06<44:23,  9.45s/it]

24005


 40%|████      | 190/471 [23:10<36:01,  7.69s/it]

24132


 41%|████      | 191/471 [23:14<30:40,  6.57s/it]

24316


 41%|████      | 192/471 [23:23<34:05,  7.33s/it]

24153


 41%|████      | 193/471 [23:28<31:03,  6.70s/it]

24146


 41%|████      | 194/471 [23:34<29:42,  6.43s/it]

24338


 41%|████▏     | 195/471 [23:41<30:26,  6.62s/it]

24269


 42%|████▏     | 196/471 [23:50<33:28,  7.30s/it]

23990


 42%|████▏     | 197/471 [23:54<29:08,  6.38s/it]

23952


 42%|████▏     | 198/471 [23:59<26:22,  5.80s/it]

24180


 42%|████▏     | 199/471 [24:05<27:41,  6.11s/it]

24088


 42%|████▏     | 200/471 [24:12<28:47,  6.38s/it]

23954


 43%|████▎     | 201/471 [24:22<33:18,  7.40s/it]

24067


 43%|████▎     | 202/471 [24:26<27:59,  6.24s/it]

24190


 43%|████▎     | 203/471 [24:29<23:39,  5.30s/it]

24137


 43%|████▎     | 204/471 [24:35<25:18,  5.69s/it]

24271


 44%|████▎     | 205/471 [24:40<23:00,  5.19s/it]

24313


 44%|████▎     | 206/471 [24:45<23:02,  5.22s/it]

24158


 44%|████▍     | 207/471 [24:49<21:54,  4.98s/it]

23951


 44%|████▍     | 208/471 [24:52<19:30,  4.45s/it]

24219


 44%|████▍     | 209/471 [24:57<19:56,  4.57s/it]

23975


 45%|████▍     | 210/471 [25:06<25:00,  5.75s/it]

24169


 45%|████▍     | 211/471 [25:17<31:55,  7.37s/it]

24150


 45%|████▌     | 212/471 [25:19<25:28,  5.90s/it]

23969


 45%|████▌     | 213/471 [25:31<32:39,  7.59s/it]

24000


 45%|████▌     | 214/471 [25:36<29:52,  6.98s/it]

24633


 46%|████▌     | 215/471 [25:41<26:19,  6.17s/it]

24412


 46%|████▌     | 216/471 [25:52<32:31,  7.65s/it]

24610


 46%|████▌     | 217/471 [25:56<27:51,  6.58s/it]

24460


 46%|████▋     | 218/471 [26:03<28:05,  6.66s/it]

24608


 46%|████▋     | 219/471 [26:08<26:23,  6.28s/it]

24734


 47%|████▋     | 220/471 [26:15<27:13,  6.51s/it]

24729


 47%|████▋     | 221/471 [26:19<23:48,  5.72s/it]

24376


 47%|████▋     | 222/471 [26:27<26:31,  6.39s/it]

24446


 47%|████▋     | 223/471 [26:38<31:38,  7.65s/it]

24747


 48%|████▊     | 224/471 [26:46<31:51,  7.74s/it]

24352


 48%|████▊     | 225/471 [26:54<32:49,  8.01s/it]

24687


 48%|████▊     | 226/471 [27:03<33:03,  8.09s/it]

24396


 48%|████▊     | 227/471 [27:14<36:45,  9.04s/it]

24374


 48%|████▊     | 228/471 [27:23<37:13,  9.19s/it]

24361


 49%|████▊     | 229/471 [27:32<36:17,  9.00s/it]

24450


 49%|████▉     | 230/471 [27:37<31:49,  7.92s/it]

24419


 49%|████▉     | 231/471 [27:39<24:36,  6.15s/it]

24468


 49%|████▉     | 232/471 [27:42<20:11,  5.07s/it]

24696


 49%|████▉     | 233/471 [27:53<27:45,  7.00s/it]

24415


 50%|████▉     | 234/471 [27:59<25:33,  6.47s/it]

24509


 50%|████▉     | 235/471 [28:10<30:57,  7.87s/it]

24728


 50%|█████     | 236/471 [28:12<24:20,  6.22s/it]

24702


 50%|█████     | 237/471 [28:21<27:36,  7.08s/it]

24500


 51%|█████     | 238/471 [28:31<30:51,  7.94s/it]

24344


 51%|█████     | 239/471 [28:34<25:14,  6.53s/it]

24479


 51%|█████     | 240/471 [28:37<21:03,  5.47s/it]

24614


 51%|█████     | 241/471 [28:43<20:36,  5.38s/it]

24612


 51%|█████▏    | 242/471 [28:54<27:03,  7.09s/it]

24497


 52%|█████▏    | 243/471 [29:03<29:43,  7.82s/it]

24670


 52%|█████▏    | 244/471 [29:09<27:03,  7.15s/it]

24622


 52%|█████▏    | 245/471 [29:14<24:38,  6.54s/it]

24750


 52%|█████▏    | 246/471 [29:18<21:43,  5.80s/it]

24488


 52%|█████▏    | 247/471 [29:24<21:36,  5.79s/it]

24755


 53%|█████▎    | 248/471 [29:33<25:26,  6.85s/it]

24689


 53%|█████▎    | 249/471 [29:43<28:36,  7.73s/it]

24621


 53%|█████▎    | 250/471 [29:54<32:31,  8.83s/it]

24738


 53%|█████▎    | 251/471 [30:01<30:40,  8.36s/it]

24508


 54%|█████▎    | 252/471 [30:13<33:35,  9.20s/it]

24552


 54%|█████▎    | 253/471 [30:21<32:17,  8.89s/it]

24705


 54%|█████▍    | 254/471 [30:28<30:25,  8.41s/it]

24722


 54%|█████▍    | 255/471 [30:37<31:05,  8.64s/it]

24462


 54%|█████▍    | 256/471 [30:46<31:29,  8.79s/it]

25024


 55%|█████▍    | 257/471 [30:49<24:21,  6.83s/it]

24898


 55%|█████▍    | 258/471 [31:00<29:12,  8.23s/it]

24770


 55%|█████▍    | 259/471 [31:05<25:50,  7.32s/it]

24884


 55%|█████▌    | 260/471 [31:09<21:40,  6.16s/it]

24811


 55%|█████▌    | 261/471 [31:13<19:45,  5.65s/it]

24763


 56%|█████▌    | 262/471 [31:16<16:39,  4.78s/it]

24870


 56%|█████▌    | 263/471 [31:27<23:20,  6.73s/it]

24918


 56%|█████▌    | 264/471 [31:33<22:27,  6.51s/it]

24979


 56%|█████▋    | 265/471 [31:40<22:05,  6.43s/it]

24848


 56%|█████▋    | 266/471 [31:45<20:41,  6.06s/it]

24880


 57%|█████▋    | 267/471 [31:49<19:08,  5.63s/it]

24791


 57%|█████▋    | 268/471 [31:55<19:17,  5.70s/it]

24841


 57%|█████▋    | 269/471 [32:07<25:03,  7.44s/it]

24790


 57%|█████▋    | 270/471 [32:12<22:31,  6.72s/it]

24864


 58%|█████▊    | 271/471 [32:15<19:09,  5.75s/it]

24804


 58%|█████▊    | 272/471 [32:19<16:41,  5.03s/it]

24789


 58%|█████▊    | 273/471 [32:29<22:01,  6.67s/it]

24885


 58%|█████▊    | 274/471 [32:36<21:49,  6.65s/it]

24891


 58%|█████▊    | 275/471 [32:38<17:41,  5.41s/it]

24917


 59%|█████▊    | 276/471 [32:50<23:20,  7.18s/it]

24915


 59%|█████▉    | 277/471 [32:59<25:43,  7.96s/it]

24932


 59%|█████▉    | 278/471 [33:02<20:04,  6.24s/it]

25036


 59%|█████▉    | 279/471 [33:12<23:37,  7.38s/it]

24905


 59%|█████▉    | 280/471 [33:14<18:26,  5.79s/it]

25022


 60%|█████▉    | 281/471 [33:19<17:28,  5.52s/it]

24824


 60%|█████▉    | 282/471 [33:24<17:33,  5.57s/it]

24867


 60%|██████    | 283/471 [33:32<19:23,  6.19s/it]

24893


 60%|██████    | 284/471 [33:41<22:24,  7.19s/it]

25315


 61%|██████    | 285/471 [33:46<19:41,  6.35s/it]

25552


 61%|██████    | 286/471 [33:53<20:16,  6.58s/it]

25299


 61%|██████    | 287/471 [34:03<22:58,  7.49s/it]

25197


 61%|██████    | 288/471 [34:11<24:07,  7.91s/it]

25526


 61%|██████▏   | 289/471 [34:21<25:11,  8.31s/it]

25180


 62%|██████▏   | 290/471 [34:29<24:41,  8.18s/it]

25079


 62%|██████▏   | 291/471 [34:38<25:38,  8.55s/it]

25219


 62%|██████▏   | 292/471 [34:47<26:07,  8.76s/it]

25135


 62%|██████▏   | 293/471 [34:52<22:45,  7.67s/it]

25184


 62%|██████▏   | 294/471 [35:02<24:04,  8.16s/it]

25231


 63%|██████▎   | 295/471 [35:09<23:36,  8.05s/it]

25124


 63%|██████▎   | 296/471 [35:14<20:10,  6.92s/it]

25435


 63%|██████▎   | 297/471 [35:19<18:49,  6.49s/it]

25572


 63%|██████▎   | 298/471 [35:28<21:00,  7.28s/it]

25300


 63%|██████▎   | 299/471 [35:31<17:10,  5.99s/it]

25122


 64%|██████▎   | 300/471 [35:38<17:24,  6.11s/it]

25176


 64%|██████▍   | 301/471 [35:47<20:12,  7.13s/it]

25322


 64%|██████▍   | 302/471 [35:50<16:06,  5.72s/it]

25250


 64%|██████▍   | 303/471 [35:57<17:04,  6.10s/it]

25133


 65%|██████▍   | 304/471 [36:06<19:36,  7.04s/it]

25285


 65%|██████▍   | 305/471 [36:09<16:08,  5.84s/it]

25177


 65%|██████▍   | 306/471 [36:14<15:50,  5.76s/it]

25408


 65%|██████▌   | 307/471 [36:24<18:40,  6.83s/it]

25372


 65%|██████▌   | 308/471 [36:26<14:42,  5.41s/it]

25194


 66%|██████▌   | 309/471 [36:29<12:44,  4.72s/it]

25141


 66%|██████▌   | 310/471 [36:35<13:28,  5.02s/it]

25207


 66%|██████▌   | 311/471 [36:37<11:33,  4.34s/it]

25520


 66%|██████▌   | 312/471 [36:45<13:44,  5.19s/it]

25144


 66%|██████▋   | 313/471 [36:56<18:22,  6.98s/it]

25414


 67%|██████▋   | 314/471 [36:58<14:23,  5.50s/it]

25573


 67%|██████▋   | 315/471 [37:09<18:53,  7.27s/it]

25560


 67%|██████▋   | 316/471 [37:12<14:57,  5.79s/it]

25508


 67%|██████▋   | 317/471 [37:19<15:47,  6.15s/it]

25279


 68%|██████▊   | 318/471 [37:23<14:23,  5.64s/it]

25200


 68%|██████▊   | 319/471 [37:31<16:03,  6.34s/it]

25121


 68%|██████▊   | 320/471 [37:38<16:30,  6.56s/it]

25325


 68%|██████▊   | 321/471 [37:42<14:36,  5.84s/it]

25333


 68%|██████▊   | 322/471 [37:49<14:53,  5.99s/it]

25162


 69%|██████▊   | 323/471 [37:52<12:58,  5.26s/it]

25264


 69%|██████▉   | 324/471 [37:55<10:50,  4.42s/it]

25331


 69%|██████▉   | 325/471 [38:02<12:54,  5.30s/it]

25349


 69%|██████▉   | 326/471 [38:13<17:01,  7.05s/it]

25418


 69%|██████▉   | 327/471 [38:24<19:45,  8.23s/it]

25130


 70%|██████▉   | 328/471 [38:29<17:38,  7.40s/it]

25701


 70%|██████▉   | 329/471 [38:34<15:14,  6.44s/it]

25856


 70%|███████   | 330/471 [38:39<14:39,  6.24s/it]

25634


 70%|███████   | 331/471 [38:44<13:26,  5.76s/it]

25619


 70%|███████   | 332/471 [38:52<14:54,  6.44s/it]

25710


 71%|███████   | 333/471 [38:55<12:33,  5.46s/it]

25632


 71%|███████   | 334/471 [39:02<13:17,  5.82s/it]

25631


 71%|███████   | 335/471 [39:05<11:03,  4.88s/it]

25847


 71%|███████▏  | 336/471 [39:08<09:59,  4.44s/it]

25610


 72%|███████▏  | 337/471 [39:13<10:19,  4.62s/it]

25605


 72%|███████▏  | 338/471 [39:20<11:49,  5.33s/it]

25855


 72%|███████▏  | 339/471 [39:29<14:20,  6.52s/it]

25614


 72%|███████▏  | 340/471 [39:32<11:57,  5.48s/it]

25809


 72%|███████▏  | 341/471 [39:42<14:19,  6.61s/it]

25687


 73%|███████▎  | 342/471 [39:51<15:57,  7.42s/it]

25766


 73%|███████▎  | 343/471 [40:01<17:41,  8.29s/it]

25674


 73%|███████▎  | 344/471 [40:12<19:11,  9.06s/it]

25802


 73%|███████▎  | 345/471 [40:14<14:46,  7.03s/it]

25671


 73%|███████▎  | 346/471 [40:23<15:37,  7.50s/it]

25590


 74%|███████▎  | 347/471 [40:33<16:45,  8.11s/it]

25705


 74%|███████▍  | 348/471 [40:41<16:30,  8.06s/it]

25824


 74%|███████▍  | 349/471 [40:48<15:48,  7.77s/it]

25787


 74%|███████▍  | 350/471 [40:55<15:09,  7.52s/it]

25833


 75%|███████▍  | 351/471 [40:57<12:06,  6.06s/it]

25857


 75%|███████▍  | 352/471 [41:07<14:17,  7.20s/it]

25630


 75%|███████▍  | 353/471 [41:12<12:58,  6.60s/it]

25616


 75%|███████▌  | 354/471 [41:15<10:42,  5.49s/it]

25773


 75%|███████▌  | 355/471 [41:26<13:48,  7.14s/it]

25613


 76%|███████▌  | 356/471 [41:32<12:57,  6.76s/it]

25654


 76%|███████▌  | 357/471 [41:41<14:01,  7.38s/it]

25589


 76%|███████▌  | 358/471 [41:50<14:54,  7.92s/it]

26132


 76%|███████▌  | 359/471 [41:57<14:14,  7.63s/it]

25931


 76%|███████▋  | 360/471 [42:06<14:59,  8.11s/it]

26159


 77%|███████▋  | 361/471 [42:12<13:33,  7.40s/it]

26331


 77%|███████▋  | 362/471 [42:21<14:19,  7.89s/it]

26136


 77%|███████▋  | 363/471 [42:26<12:31,  6.95s/it]

26160


 77%|███████▋  | 364/471 [42:32<12:04,  6.78s/it]

26038


 77%|███████▋  | 365/471 [42:38<11:23,  6.45s/it]

26259


 78%|███████▊  | 366/471 [42:44<11:00,  6.29s/it]

26067


 78%|███████▊  | 367/471 [42:54<13:12,  7.62s/it]

25885


 78%|███████▊  | 368/471 [42:57<10:37,  6.19s/it]

26074


 78%|███████▊  | 369/471 [43:00<08:31,  5.01s/it]

26284


 79%|███████▊  | 370/471 [43:07<09:41,  5.76s/it]

26291


 79%|███████▉  | 371/471 [43:10<07:56,  4.76s/it]

26002


 79%|███████▉  | 372/471 [43:17<09:08,  5.54s/it]

25940


 79%|███████▉  | 373/471 [43:24<09:48,  6.01s/it]

25957


 79%|███████▉  | 374/471 [43:31<10:03,  6.22s/it]

26012


 80%|███████▉  | 375/471 [43:33<07:58,  4.98s/it]

26178


 80%|███████▉  | 376/471 [43:35<06:41,  4.23s/it]

26194


 80%|████████  | 377/471 [43:44<08:52,  5.66s/it]

26163


 80%|████████  | 378/471 [43:47<07:32,  4.86s/it]

26240


 80%|████████  | 379/471 [43:49<06:12,  4.05s/it]

26317


 81%|████████  | 380/471 [43:58<08:24,  5.54s/it]

26174


 81%|████████  | 381/471 [44:04<08:08,  5.43s/it]

26095


 81%|████████  | 382/471 [44:11<08:43,  5.89s/it]

26021


 81%|████████▏ | 383/471 [44:21<10:49,  7.38s/it]

26024


 82%|████████▏ | 384/471 [44:28<10:16,  7.09s/it]

25860


 82%|████████▏ | 385/471 [44:31<08:28,  5.91s/it]

26129


 82%|████████▏ | 386/471 [44:37<08:12,  5.80s/it]

25991


 82%|████████▏ | 387/471 [44:39<06:51,  4.90s/it]

26084


 82%|████████▏ | 388/471 [44:45<07:06,  5.13s/it]

26027


 83%|████████▎ | 389/471 [44:47<05:36,  4.11s/it]

26545


 83%|████████▎ | 393/471 [44:53<03:16,  2.51s/it]

26394


 84%|████████▎ | 394/471 [45:05<05:22,  4.19s/it]

26526


 84%|████████▍ | 395/471 [45:12<06:11,  4.89s/it]

26409


 84%|████████▍ | 396/471 [45:21<07:15,  5.81s/it]

26437


 84%|████████▍ | 397/471 [45:25<06:42,  5.45s/it]

26411


 85%|████████▍ | 398/471 [45:35<08:01,  6.60s/it]

26560


 85%|████████▍ | 399/471 [45:40<07:15,  6.04s/it]

26601


 85%|████████▍ | 400/471 [45:49<08:14,  6.96s/it]

26593


 85%|████████▌ | 401/471 [45:52<06:52,  5.89s/it]

26532


 85%|████████▌ | 402/471 [45:59<06:51,  5.96s/it]

26405


 86%|████████▌ | 403/471 [46:07<07:35,  6.70s/it]

26585


 86%|████████▌ | 404/471 [46:10<06:12,  5.56s/it]

26491


 86%|████████▌ | 405/471 [46:20<07:36,  6.91s/it]

26514


 86%|████████▌ | 406/471 [46:28<07:52,  7.26s/it]

26749


 86%|████████▋ | 407/471 [46:34<07:13,  6.77s/it]

26662


 87%|████████▋ | 408/471 [46:39<06:44,  6.41s/it]

11427


 87%|████████▋ | 409/471 [46:48<07:19,  7.10s/it]

26685


 87%|████████▋ | 410/471 [46:54<06:45,  6.65s/it]

11436


 87%|████████▋ | 411/471 [47:01<06:51,  6.85s/it]

11387


 87%|████████▋ | 412/471 [47:06<06:11,  6.30s/it]

11290


 88%|████████▊ | 413/471 [47:12<06:07,  6.33s/it]

11314


 88%|████████▊ | 414/471 [47:15<04:51,  5.11s/it]

11206


 88%|████████▊ | 415/471 [47:16<03:49,  4.09s/it]

26738


 88%|████████▊ | 416/471 [47:25<05:05,  5.55s/it]

11378


 89%|████████▊ | 417/471 [47:34<05:56,  6.61s/it]

11219


 89%|████████▊ | 418/471 [47:37<04:44,  5.37s/it]

11205


 89%|████████▉ | 419/471 [47:46<05:33,  6.41s/it]

11216


 89%|████████▉ | 420/471 [47:48<04:29,  5.29s/it]

11224


 89%|████████▉ | 421/471 [47:53<04:19,  5.19s/it]

26649


 90%|████████▉ | 422/471 [47:58<04:11,  5.13s/it]

11325


 90%|████████▉ | 423/471 [48:05<04:32,  5.67s/it]

11283


 90%|█████████ | 424/471 [48:08<03:45,  4.79s/it]

26729


 90%|█████████ | 425/471 [48:10<02:58,  3.89s/it]

11416


 90%|█████████ | 426/471 [48:15<03:18,  4.40s/it]

11277


 91%|█████████ | 427/471 [48:26<04:34,  6.24s/it]

11279


 91%|█████████ | 428/471 [48:35<05:02,  7.04s/it]

11388


 91%|█████████ | 429/471 [48:40<04:37,  6.61s/it]

11521


 92%|█████████▏| 431/471 [48:43<02:42,  4.06s/it]

11854


 92%|█████████▏| 434/471 [48:53<02:17,  3.72s/it]

11867


 93%|█████████▎| 437/471 [49:01<01:52,  3.32s/it]

11835


 93%|█████████▎| 438/471 [49:06<01:59,  3.61s/it]

11837


 93%|█████████▎| 439/471 [49:12<02:11,  4.11s/it]

11625


 93%|█████████▎| 440/471 [49:16<02:05,  4.06s/it]

11891


 94%|█████████▎| 441/471 [49:27<02:48,  5.61s/it]

11742


 94%|█████████▍| 442/471 [49:33<02:44,  5.68s/it]

11890


 94%|█████████▍| 443/471 [49:38<02:37,  5.64s/it]

11704


 94%|█████████▍| 444/471 [49:46<02:46,  6.16s/it]

11712


 94%|█████████▍| 445/471 [49:48<02:12,  5.11s/it]

11882


 95%|█████████▍| 446/471 [49:57<02:36,  6.25s/it]

11608


 95%|█████████▍| 447/471 [50:00<02:03,  5.17s/it]

11525


 95%|█████████▌| 448/471 [50:04<01:55,  5.00s/it]

11720


 95%|█████████▌| 449/471 [50:14<02:18,  6.29s/it]

11851


 96%|█████████▌| 450/471 [50:19<02:03,  5.89s/it]

11451


 96%|█████████▌| 451/471 [50:24<01:54,  5.71s/it]

12110


 96%|█████████▌| 452/471 [50:34<02:10,  6.85s/it]

12106


 96%|█████████▌| 453/471 [50:37<01:44,  5.79s/it]

12239


 96%|█████████▋| 454/471 [50:42<01:35,  5.64s/it]

11910


 97%|█████████▋| 455/471 [50:47<01:26,  5.42s/it]

12056


 97%|█████████▋| 456/471 [50:50<01:11,  4.79s/it]

12191


 97%|█████████▋| 457/471 [50:55<01:07,  4.82s/it]

12082


 97%|█████████▋| 458/471 [51:05<01:22,  6.36s/it]

12075


 97%|█████████▋| 459/471 [51:14<01:25,  7.13s/it]

12256


 98%|█████████▊| 460/471 [51:19<01:09,  6.36s/it]

12226


 98%|█████████▊| 461/471 [51:25<01:02,  6.22s/it]

12228


 98%|█████████▊| 462/471 [51:29<00:51,  5.71s/it]

12078


 98%|█████████▊| 463/471 [51:35<00:45,  5.66s/it]

11985


 99%|█████████▊| 464/471 [51:40<00:39,  5.59s/it]

12192


 99%|█████████▊| 465/471 [51:48<00:38,  6.43s/it]

12087


 99%|█████████▉| 466/471 [51:58<00:36,  7.40s/it]

12042


 99%|█████████▉| 467/471 [52:01<00:24,  6.17s/it]

12253


 99%|█████████▉| 468/471 [52:10<00:20,  6.92s/it]

12029


100%|█████████▉| 469/471 [52:16<00:13,  6.62s/it]

12074


100%|█████████▉| 470/471 [52:22<00:06,  6.44s/it]

12281


100%|██████████| 471/471 [52:25<00:00,  6.68s/it]


In [287]:
# length = [len(result_dict_story_gen_just_bert_model[w]) for w in result_dict_story_gen_just_bert_model]
# print(*length, sep=', ')

In [262]:
pickle.dump(result_dict_story_gen_just_bert_model, open('25_result_dict_story_gen_just_bert_model_v128.p', 'wb'))

In [263]:
#doc2vec algorithm:
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfTransformer
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from nltk.stem import PorterStemmer
from nltk.tokenize import sent_tokenize, word_tokenize

stopwords = ['i','me','my','myself','we','our','ours','ourselves','you','your','yours','yourself','yourselves','he','him','his','himself','she','her','hers','herself','it','its','itself','they','them','their','theirs','themselves','what','which','who','whom','this','that','these','those','am','is','are','was','were','be','been','being','have','has','had','having','do','does','did','doing','a','an','the','and','but','if','or','because','as','until','while','of','at','by','for','with','about','against','between','into','through','during','before','after','above','below','to','from','up','down','in','out','on','off','over','under','again','further','then','once','here','there','when','where','why','how','all','any','both','each','few','more','most','other','some','such','no','nor','not','only','own','same','so','than','too','very','s','t','can','will','just','don','should','now', 'of', 'within']

def pre_processing(sentence_, stopwords_):
  words = word_tokenize(sentence_)
  ps = PorterStemmer()
  rootWord = list()
  for w in words:
      if w not in stopwords_:
        rootWord.append(ps.stem(w))
  return ' '.join( rootWord)


tokenized_doc = []
for ind in text_id_dict.keys():
  tokenized_doc.append(word_tokenize(pre_processing(text_id_dict[ind], stopwords)))

# Convert tokenized document into gensim formated tagged data
tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_doc)]
# Train the model
model = Doc2Vec(vector_size= 128, window = 5, min_count = 2, workers =4, epochs= 100)
model.build_vocab(tagged_data)
model.train(tagged_data, total_examples=model.corpus_count, epochs=model.epochs)


doc2vec_id_dict= dict()
for ind in tqdm(text_id_dict.keys()):
  doc2vec_id_dict[ind] =  list(model.infer_vector([text_id_dict[ind]]))

100%|██████████| 8338/8338 [00:08<00:00, 1011.87it/s]


In [288]:
# doc2vec model
def story_gen_just_doc2vec_model(seed_, collect_ids):
    ###SEARCH ALGORITHM    
    min_acceptable_similarity= 0.01
    min_keyword_overlapping= 0.60
    starting_ts = ts_finder(seed_, collect_ids)        
    seed_doc = seed_
    storyline = []
    storyline.append(seed_doc)
    for ts_ids in list(collect_ids.keys())[starting_ts+1:]:
        #preparing conditions and noises for diffusion model
        
        data_ids = [w[0] for w in data_timestamps[ts_ids]]
        seed_embedding = doc2vec_id_dict[seed_doc]
        temp = list()
        candidate_docs_id = collect_ids[ts_ids]
        candidate_scores = [abs(cosine_similarity([doc2vec_id_dict[w]],[seed_embedding])[0][0]) for w in candidate_docs_id]
        # print(candidate_scores)
        candidates = [(candidate_docs_id[i],candidate_scores[i]) for i in range(len(candidate_docs_id)) if candidate_scores[i] > min_acceptable_similarity]
        if len(candidates) < 1:
        #there is no document in the timestamp with the minimum acceptable keyword overlap
            continue
    
        candidate_docs_tf_idf = [keyword_extractor(tf_idf_dict[w[0]], list_of_words_in_tfidf, 100) for w in candidates]
        candidate_scores = [keyword_similarity(w,storyline_keywords) for w in candidate_docs_tf_idf]
        candidate_scores = [w for w in candidate_scores if w > min_keyword_overlapping]
        if len(candidate_scores)<1:
            continue
        
        final_score = [m*z[1] for m,z in zip(candidate_scores, candidates)]
        max_candidate_sim = final_score.index(max(final_score))
        seed_doc = candidates[max_candidate_sim][0]
        
        # add the new pick into the soryline
        storyline.append(seed_doc)
        # # update the storyline_keywords
        
    return storyline

In [289]:
#just doc2vec model
seen_doc = set()
result_dict_story_gen_just_doc2vec_model = dict()
for se in tqdm(seed_documents_for_experiments):
    if se in seen_doc:
        continue
    print(se)
    res = story_gen_just_doc2vec_model(se, collect_ids)
    result_dict_story_gen_just_doc2vec_model[se] = res
    for d in res:
        seen_doc.add(d)

  0%|          | 0/471 [00:00<?, ?it/s]

22989


  0%|          | 1/471 [00:53<6:57:42, 53.32s/it]

23079


  0%|          | 2/471 [01:46<6:56:54, 53.33s/it]

23002


  1%|          | 3/471 [02:40<6:56:02, 53.34s/it]

23124


  1%|          | 4/471 [03:33<6:55:13, 53.35s/it]

23131


  1%|          | 5/471 [04:26<6:53:59, 53.30s/it]

23119


  1%|▏         | 6/471 [05:19<6:53:01, 53.29s/it]

23192


  1%|▏         | 7/471 [06:13<6:52:20, 53.32s/it]

23066


  2%|▏         | 8/471 [07:06<6:51:16, 53.30s/it]

22988


  2%|▏         | 9/471 [07:59<6:49:57, 53.24s/it]

23159


  2%|▏         | 10/471 [08:53<6:49:27, 53.29s/it]

23107


  2%|▏         | 11/471 [09:46<6:50:03, 53.49s/it]

23022


  3%|▎         | 12/471 [10:40<6:49:03, 53.47s/it]

23045


  3%|▎         | 13/471 [11:33<6:48:07, 53.47s/it]

23110


  3%|▎         | 14/471 [12:27<6:46:55, 53.43s/it]

23084


  3%|▎         | 15/471 [13:20<6:46:17, 53.46s/it]

23017


  3%|▎         | 16/471 [14:13<6:44:54, 53.39s/it]

23036


  4%|▎         | 17/471 [15:07<6:43:59, 53.39s/it]

23023


  4%|▍         | 18/471 [16:00<6:43:12, 53.40s/it]

23005


  4%|▍         | 19/471 [16:54<6:42:17, 53.40s/it]

23123


  4%|▍         | 20/471 [17:47<6:41:22, 53.40s/it]

23061


  4%|▍         | 21/471 [18:40<6:40:14, 53.37s/it]

23121


  5%|▍         | 22/471 [19:34<6:40:30, 53.52s/it]

23073


  5%|▍         | 23/471 [20:28<6:39:11, 53.46s/it]

23020


  5%|▌         | 24/471 [21:21<6:38:26, 53.48s/it]

23163


  5%|▌         | 25/471 [22:15<6:37:58, 53.54s/it]

23177


  6%|▌         | 26/471 [23:08<6:36:56, 53.52s/it]

23077


  6%|▌         | 27/471 [24:02<6:36:08, 53.53s/it]

23130


  6%|▌         | 28/471 [24:55<6:34:38, 53.45s/it]

23188


  6%|▌         | 29/471 [25:49<6:34:04, 53.50s/it]

23054


  6%|▋         | 30/471 [26:42<6:33:21, 53.52s/it]

22990


  7%|▋         | 31/471 [27:36<6:32:57, 53.59s/it]

23189


  7%|▋         | 32/471 [28:30<6:32:31, 53.65s/it]

23115


  7%|▋         | 33/471 [29:23<6:31:43, 53.66s/it]

23040


  7%|▋         | 34/471 [30:17<6:31:34, 53.76s/it]

23013


  7%|▋         | 35/471 [31:11<6:31:04, 53.82s/it]

23185


  8%|▊         | 36/471 [32:05<6:29:31, 53.73s/it]

23126


  8%|▊         | 37/471 [32:58<6:28:04, 53.65s/it]

22997


  8%|▊         | 38/471 [33:52<6:26:28, 53.55s/it]

23060


  8%|▊         | 39/471 [34:45<6:25:39, 53.56s/it]

23041


  8%|▊         | 40/471 [35:39<6:24:59, 53.60s/it]

22986


  9%|▊         | 41/471 [36:32<6:23:31, 53.51s/it]

23004


  9%|▉         | 42/471 [37:26<6:22:10, 53.45s/it]

23137


  9%|▉         | 43/471 [38:19<6:20:54, 53.40s/it]

23057


  9%|▉         | 44/471 [39:12<6:20:12, 53.42s/it]

23136


 10%|▉         | 45/471 [40:06<6:20:42, 53.62s/it]

23068


 10%|▉         | 46/471 [41:00<6:19:24, 53.56s/it]

23064


 10%|▉         | 47/471 [41:53<6:18:25, 53.55s/it]

23146


 10%|█         | 48/471 [42:47<6:17:29, 53.55s/it]

23166


 10%|█         | 49/471 [43:40<6:16:19, 53.51s/it]

23055


 11%|█         | 50/471 [44:34<6:15:38, 53.54s/it]

23026


 11%|█         | 51/471 [45:28<6:15:06, 53.59s/it]

23018


 11%|█         | 52/471 [46:21<6:13:08, 53.43s/it]

22996


 11%|█▏        | 53/471 [47:14<6:12:02, 53.40s/it]

22998


 11%|█▏        | 54/471 [48:08<6:11:26, 53.44s/it]

23006


 12%|█▏        | 55/471 [49:01<6:10:52, 53.49s/it]

23167


 12%|█▏        | 56/471 [49:55<6:10:13, 53.53s/it]

22983


 12%|█▏        | 57/471 [50:49<6:10:06, 53.64s/it]

23080


 12%|█▏        | 58/471 [51:42<6:09:29, 53.68s/it]

23031


 13%|█▎        | 59/471 [52:36<6:08:34, 53.68s/it]

23035


 13%|█▎        | 60/471 [53:30<6:07:07, 53.60s/it]

22994


 13%|█▎        | 61/471 [54:23<6:05:37, 53.51s/it]

23147


 13%|█▎        | 62/471 [55:16<6:03:51, 53.38s/it]

23122


 13%|█▎        | 63/471 [56:09<6:02:36, 53.32s/it]

23104


 14%|█▎        | 64/471 [57:03<6:02:03, 53.37s/it]

23525


 14%|█▍        | 65/471 [57:55<5:59:32, 53.13s/it]

23565


 14%|█▍        | 66/471 [58:48<5:57:47, 53.01s/it]

23489


 14%|█▍        | 67/471 [59:41<5:56:35, 52.96s/it]

23312


 14%|█▍        | 68/471 [1:00:34<5:56:02, 53.01s/it]

23400


 15%|█▍        | 69/471 [1:01:26<5:54:22, 52.89s/it]

23344


 15%|█▍        | 70/471 [1:02:19<5:53:06, 52.83s/it]

23461


 15%|█▌        | 71/471 [1:03:12<5:51:59, 52.80s/it]

23331


 15%|█▌        | 72/471 [1:04:05<5:50:44, 52.74s/it]

23197


 15%|█▌        | 73/471 [1:04:57<5:49:50, 52.74s/it]

23298


 16%|█▌        | 74/471 [1:05:50<5:49:11, 52.78s/it]

23551


 16%|█▌        | 75/471 [1:06:43<5:48:34, 52.81s/it]

23510


 16%|█▌        | 76/471 [1:07:36<5:48:02, 52.87s/it]

23481


 16%|█▋        | 77/471 [1:08:29<5:46:51, 52.82s/it]

23548


 17%|█▋        | 78/471 [1:09:22<5:46:01, 52.83s/it]

23466


 17%|█▋        | 79/471 [1:10:14<5:44:56, 52.80s/it]

23474


 17%|█▋        | 80/471 [1:11:07<5:44:32, 52.87s/it]

23553


 17%|█▋        | 81/471 [1:12:00<5:43:22, 52.83s/it]

23431


 17%|█▋        | 82/471 [1:12:53<5:42:35, 52.84s/it]

23359


 18%|█▊        | 83/471 [1:13:46<5:41:26, 52.80s/it]

23206


 18%|█▊        | 84/471 [1:14:38<5:40:23, 52.77s/it]

23201


 18%|█▊        | 85/471 [1:15:31<5:39:19, 52.74s/it]

23395


 18%|█▊        | 86/471 [1:16:24<5:38:06, 52.69s/it]

23291


 18%|█▊        | 87/471 [1:17:16<5:37:22, 52.72s/it]

23569


 19%|█▊        | 88/471 [1:18:09<5:36:34, 52.73s/it]

23624


 19%|█▉        | 89/471 [1:19:02<5:35:32, 52.70s/it]

23511


 19%|█▉        | 90/471 [1:19:54<5:34:36, 52.70s/it]

23296


 19%|█▉        | 91/471 [1:20:48<5:34:41, 52.85s/it]

23397


 20%|█▉        | 92/471 [1:21:41<5:34:09, 52.90s/it]

23248


 20%|█▉        | 93/471 [1:22:34<5:33:50, 52.99s/it]

23570


 20%|█▉        | 94/471 [1:23:27<5:33:01, 53.00s/it]

23442


 20%|██        | 95/471 [1:24:20<5:31:43, 52.93s/it]

23460


 20%|██        | 96/471 [1:25:13<5:30:43, 52.92s/it]

23193


 21%|██        | 97/471 [1:26:05<5:29:22, 52.84s/it]

23608


 21%|██        | 98/471 [1:26:58<5:28:19, 52.81s/it]

23199


 21%|██        | 99/471 [1:27:51<5:27:28, 52.82s/it]

23550


 21%|██        | 100/471 [1:28:44<5:26:50, 52.86s/it]

23417


 21%|██▏       | 101/471 [1:29:37<5:25:47, 52.83s/it]

23266


 22%|██▏       | 102/471 [1:30:29<5:24:31, 52.77s/it]

23480


 22%|██▏       | 103/471 [1:31:22<5:24:05, 52.84s/it]

23576


 22%|██▏       | 104/471 [1:32:15<5:23:06, 52.82s/it]

23613


 22%|██▏       | 105/471 [1:33:08<5:22:04, 52.80s/it]

23581


 23%|██▎       | 106/471 [1:34:00<5:20:47, 52.73s/it]

23313


 23%|██▎       | 107/471 [1:34:53<5:19:46, 52.71s/it]

23618


 23%|██▎       | 108/471 [1:35:46<5:18:44, 52.69s/it]

23428


 23%|██▎       | 109/471 [1:36:38<5:17:32, 52.63s/it]

23383


 23%|██▎       | 110/471 [1:37:31<5:16:52, 52.67s/it]

23458


 24%|██▎       | 111/471 [1:38:23<5:15:56, 52.66s/it]

23401


 24%|██▍       | 112/471 [1:39:16<5:14:58, 52.64s/it]

23216


 24%|██▍       | 113/471 [1:40:09<5:14:03, 52.64s/it]

23468


 24%|██▍       | 114/471 [1:41:01<5:13:15, 52.65s/it]

23260


 24%|██▍       | 115/471 [1:41:54<5:13:17, 52.80s/it]

23443


 25%|██▍       | 116/471 [1:42:47<5:11:59, 52.73s/it]

23541


 25%|██▍       | 117/471 [1:43:39<5:10:27, 52.62s/it]

23282


 25%|██▌       | 118/471 [1:44:32<5:09:54, 52.68s/it]

23487


 25%|██▌       | 119/471 [1:45:25<5:09:09, 52.70s/it]

23247


 25%|██▌       | 120/471 [1:46:18<5:08:32, 52.74s/it]

23252


 26%|██▌       | 121/471 [1:47:11<5:07:45, 52.76s/it]

23445


 26%|██▌       | 122/471 [1:48:03<5:06:48, 52.75s/it]

23337


 26%|██▌       | 123/471 [1:48:56<5:06:27, 52.84s/it]

23210


 26%|██▋       | 124/471 [1:49:49<5:05:26, 52.81s/it]

23603


 27%|██▋       | 125/471 [1:50:42<5:04:24, 52.79s/it]

23749


 27%|██▋       | 126/471 [1:51:34<5:02:53, 52.68s/it]

23723


 27%|██▋       | 127/471 [1:52:26<5:00:59, 52.50s/it]

23873


 27%|██▋       | 128/471 [1:53:18<4:59:25, 52.38s/it]

23776


 27%|██▋       | 129/471 [1:54:11<4:58:16, 52.33s/it]

23874


 28%|██▊       | 130/471 [1:55:03<4:56:59, 52.26s/it]

23664


 28%|██▊       | 131/471 [1:55:55<4:55:53, 52.21s/it]

23899


 28%|██▊       | 132/471 [1:56:47<4:54:58, 52.21s/it]

23887


 28%|██▊       | 133/471 [1:57:39<4:53:45, 52.15s/it]

23837


 28%|██▊       | 134/471 [1:58:31<4:52:54, 52.15s/it]

23854


 29%|██▊       | 135/471 [1:59:23<4:52:10, 52.18s/it]

23756


 29%|██▉       | 136/471 [2:00:15<4:51:02, 52.13s/it]

23810


 29%|██▉       | 137/471 [2:01:08<4:50:00, 52.10s/it]

23688


 29%|██▉       | 138/471 [2:02:00<4:49:56, 52.24s/it]

23902


 30%|██▉       | 139/471 [2:02:52<4:49:08, 52.26s/it]

23845


 30%|██▉       | 140/471 [2:03:45<4:48:10, 52.24s/it]

23863


 30%|██▉       | 141/471 [2:04:37<4:48:01, 52.37s/it]

23721


 30%|███       | 142/471 [2:05:30<4:47:00, 52.34s/it]

23859


 30%|███       | 143/471 [2:06:22<4:46:00, 52.32s/it]

23841


 31%|███       | 144/471 [2:07:14<4:44:39, 52.23s/it]

23625


 31%|███       | 145/471 [2:08:06<4:43:51, 52.24s/it]

23833


 31%|███       | 146/471 [2:08:58<4:43:04, 52.26s/it]

23862


 31%|███       | 147/471 [2:09:51<4:42:22, 52.29s/it]

23634


 31%|███▏      | 148/471 [2:10:43<4:41:36, 52.31s/it]

23827


 32%|███▏      | 149/471 [2:11:36<4:40:58, 52.35s/it]

23631


 32%|███▏      | 150/471 [2:12:28<4:40:30, 52.43s/it]

23916


 32%|███▏      | 151/471 [2:13:21<4:39:30, 52.41s/it]

23895


 32%|███▏      | 152/471 [2:14:13<4:38:25, 52.37s/it]

23733


 32%|███▏      | 153/471 [2:15:05<4:37:18, 52.32s/it]

23628


 33%|███▎      | 154/471 [2:15:57<4:36:10, 52.27s/it]

23788


 33%|███▎      | 155/471 [2:16:49<4:34:58, 52.21s/it]

23901


 33%|███▎      | 156/471 [2:17:41<4:34:10, 52.22s/it]

23638


 33%|███▎      | 157/471 [2:18:34<4:33:13, 52.21s/it]

23826


 34%|███▎      | 158/471 [2:19:26<4:32:13, 52.18s/it]

23643


 34%|███▍      | 159/471 [2:20:18<4:31:28, 52.21s/it]

23705


 34%|███▍      | 160/471 [2:21:10<4:30:46, 52.24s/it]

23875


 34%|███▍      | 161/471 [2:22:03<4:30:23, 52.33s/it]

23831


 34%|███▍      | 162/471 [2:22:55<4:28:45, 52.19s/it]

23809


 35%|███▍      | 163/471 [2:23:47<4:27:47, 52.17s/it]

23871


 35%|███▍      | 164/471 [2:24:39<4:27:00, 52.18s/it]

23828


 35%|███▌      | 165/471 [2:25:31<4:26:12, 52.20s/it]

23794


 35%|███▌      | 166/471 [2:26:24<4:25:16, 52.19s/it]

23777


 35%|███▌      | 167/471 [2:27:16<4:24:26, 52.19s/it]

23735


 36%|███▌      | 168/471 [2:28:08<4:23:35, 52.20s/it]

23896


 36%|███▌      | 169/471 [2:29:00<4:22:53, 52.23s/it]

23714


 36%|███▌      | 170/471 [2:29:53<4:22:11, 52.27s/it]

23829


 36%|███▋      | 171/471 [2:30:45<4:21:13, 52.25s/it]

24025


 37%|███▋      | 173/471 [2:31:36<3:18:50, 40.03s/it]

24075


 37%|███▋      | 174/471 [2:32:28<3:32:54, 43.01s/it]

23941


 37%|███▋      | 175/471 [2:33:20<3:43:27, 45.29s/it]

24125


 37%|███▋      | 176/471 [2:34:12<3:51:11, 47.02s/it]

24061


 38%|███▊      | 177/471 [2:35:04<3:56:57, 48.36s/it]

24193


 38%|███▊      | 178/471 [2:35:56<4:01:15, 49.40s/it]

24026


 38%|███▊      | 179/471 [2:36:47<4:03:38, 50.06s/it]

24325


 38%|███▊      | 180/471 [2:37:39<4:05:02, 50.52s/it]

24303


 38%|███▊      | 181/471 [2:38:30<4:05:41, 50.83s/it]

24239


 39%|███▊      | 182/471 [2:39:22<4:06:01, 51.08s/it]

24148


 39%|███▉      | 183/471 [2:40:14<4:06:02, 51.26s/it]

24240


 39%|███▉      | 184/471 [2:41:06<4:05:59, 51.43s/it]

24013


 39%|███▉      | 185/471 [2:41:57<4:05:38, 51.53s/it]

24042


 39%|███▉      | 186/471 [2:42:49<4:05:31, 51.69s/it]

24329


 40%|███▉      | 187/471 [2:43:41<4:04:45, 51.71s/it]

24194


 40%|███▉      | 188/471 [2:44:33<4:03:45, 51.68s/it]

24268


 40%|████      | 189/471 [2:45:25<4:03:12, 51.75s/it]

24005


 40%|████      | 190/471 [2:46:16<4:02:11, 51.71s/it]

24132


 41%|████      | 191/471 [2:47:08<4:01:15, 51.70s/it]

24316


 41%|████      | 192/471 [2:48:00<4:00:15, 51.67s/it]

24153


 41%|████      | 193/471 [2:48:51<3:59:20, 51.66s/it]

24146


 41%|████      | 194/471 [2:49:43<3:58:33, 51.67s/it]

24338


 41%|████▏     | 195/471 [2:50:35<3:57:37, 51.66s/it]

24269


 42%|████▏     | 196/471 [2:51:26<3:56:50, 51.68s/it]

23990


 42%|████▏     | 197/471 [2:52:18<3:56:01, 51.68s/it]

23952


 42%|████▏     | 198/471 [2:53:10<3:55:49, 51.83s/it]

24180


 42%|████▏     | 199/471 [2:54:02<3:54:42, 51.77s/it]

24088


 42%|████▏     | 200/471 [2:54:53<3:53:26, 51.68s/it]

23954


 43%|████▎     | 201/471 [2:55:45<3:52:20, 51.63s/it]

24067


 43%|████▎     | 202/471 [2:56:37<3:51:38, 51.67s/it]

24190


 43%|████▎     | 203/471 [2:57:28<3:50:55, 51.70s/it]

24137


 43%|████▎     | 204/471 [2:58:20<3:50:04, 51.70s/it]

24271


 44%|████▎     | 205/471 [2:59:12<3:49:06, 51.68s/it]

24313


 44%|████▎     | 206/471 [3:00:03<3:48:19, 51.69s/it]

24158


 44%|████▍     | 207/471 [3:00:55<3:47:24, 51.68s/it]

23951


 44%|████▍     | 208/471 [3:01:47<3:46:28, 51.67s/it]

24219


 44%|████▍     | 209/471 [3:02:38<3:45:43, 51.69s/it]

23975


 45%|████▍     | 210/471 [3:03:30<3:45:04, 51.74s/it]

24169


 45%|████▍     | 211/471 [3:04:22<3:44:14, 51.75s/it]

24150


 45%|████▌     | 212/471 [3:05:14<3:43:21, 51.74s/it]

23969


 45%|████▌     | 213/471 [3:06:05<3:42:25, 51.73s/it]

24000


 45%|████▌     | 214/471 [3:06:57<3:41:38, 51.74s/it]

24633


 46%|████▌     | 215/471 [3:07:49<3:40:08, 51.59s/it]

24412


 46%|████▌     | 216/471 [3:08:40<3:38:45, 51.47s/it]

24610


 46%|████▌     | 217/471 [3:09:31<3:37:33, 51.39s/it]

24460


 46%|████▋     | 218/471 [3:10:22<3:36:07, 51.25s/it]

24608


 46%|████▋     | 219/471 [3:11:13<3:35:01, 51.19s/it]

24734


 47%|████▋     | 220/471 [3:12:04<3:34:06, 51.18s/it]

24729


 47%|████▋     | 221/471 [3:12:55<3:33:13, 51.18s/it]

24376


 47%|████▋     | 222/471 [3:13:47<3:32:48, 51.28s/it]

24446


 47%|████▋     | 223/471 [3:14:38<3:31:52, 51.26s/it]

24747


 48%|████▊     | 224/471 [3:15:29<3:30:59, 51.25s/it]

24352


 48%|████▊     | 225/471 [3:16:20<3:30:04, 51.24s/it]

24687


 48%|████▊     | 226/471 [3:17:12<3:29:09, 51.22s/it]

24396


 48%|████▊     | 227/471 [3:18:03<3:28:08, 51.18s/it]

24374


 48%|████▊     | 228/471 [3:18:54<3:27:24, 51.21s/it]

24361


 49%|████▊     | 229/471 [3:19:45<3:26:33, 51.21s/it]

24450


 49%|████▉     | 230/471 [3:20:36<3:25:49, 51.24s/it]

24419


 49%|████▉     | 231/471 [3:21:28<3:25:01, 51.26s/it]

24468


 49%|████▉     | 232/471 [3:22:19<3:23:57, 51.20s/it]

24696


 49%|████▉     | 233/471 [3:23:10<3:23:03, 51.19s/it]

24415


 50%|████▉     | 234/471 [3:24:02<3:22:39, 51.31s/it]

24509


 50%|████▉     | 235/471 [3:24:53<3:21:31, 51.24s/it]

24728


 50%|█████     | 236/471 [3:25:44<3:20:22, 51.16s/it]

24702


 50%|█████     | 237/471 [3:26:35<3:19:19, 51.11s/it]

24500


 51%|█████     | 238/471 [3:27:26<3:18:37, 51.15s/it]

24344


 51%|█████     | 239/471 [3:28:17<3:17:47, 51.15s/it]

24479


 51%|█████     | 240/471 [3:29:08<3:16:52, 51.14s/it]

24614


 51%|█████     | 241/471 [3:29:59<3:15:54, 51.11s/it]

24612


 51%|█████▏    | 242/471 [3:30:50<3:14:49, 51.05s/it]

24497


 52%|█████▏    | 243/471 [3:31:41<3:13:56, 51.04s/it]

24670


 52%|█████▏    | 244/471 [3:32:32<3:13:02, 51.02s/it]

24622


 52%|█████▏    | 245/471 [3:33:23<3:12:25, 51.09s/it]

24750


 52%|█████▏    | 246/471 [3:34:15<3:12:05, 51.23s/it]

24488


 52%|█████▏    | 247/471 [3:35:06<3:11:18, 51.24s/it]

24755


 53%|█████▎    | 248/471 [3:35:57<3:10:17, 51.20s/it]

24689


 53%|█████▎    | 249/471 [3:36:48<3:08:56, 51.07s/it]

24621


 53%|█████▎    | 250/471 [3:37:39<3:08:09, 51.08s/it]

24738


 53%|█████▎    | 251/471 [3:38:30<3:07:26, 51.12s/it]

24508


 54%|█████▎    | 252/471 [3:39:21<3:06:31, 51.10s/it]

24552


 54%|█████▎    | 253/471 [3:40:13<3:05:43, 51.12s/it]

24705


 54%|█████▍    | 254/471 [3:41:04<3:04:54, 51.13s/it]

24722


 54%|█████▍    | 255/471 [3:41:55<3:04:01, 51.12s/it]

24462


 54%|█████▍    | 256/471 [3:42:46<3:03:03, 51.08s/it]

24898


 55%|█████▍    | 258/471 [3:43:37<2:19:14, 39.22s/it]

24770


 55%|█████▍    | 259/471 [3:44:28<2:28:53, 42.14s/it]

24884


 55%|█████▌    | 260/471 [3:45:18<2:36:08, 44.40s/it]

24811


 55%|█████▌    | 261/471 [3:46:09<2:41:21, 46.10s/it]

24763


 56%|█████▌    | 262/471 [3:47:00<2:45:06, 47.40s/it]

24870


 56%|█████▌    | 263/471 [3:47:51<2:47:50, 48.42s/it]

24918


 56%|█████▌    | 264/471 [3:48:41<2:49:13, 49.05s/it]

24979


 56%|█████▋    | 265/471 [3:49:32<2:50:21, 49.62s/it]

24848


 56%|█████▋    | 266/471 [3:50:23<2:50:46, 49.98s/it]

24880


 57%|█████▋    | 267/471 [3:51:14<2:50:54, 50.26s/it]

24791


 57%|█████▋    | 268/471 [3:52:05<2:50:41, 50.45s/it]

24841


 57%|█████▋    | 269/471 [3:52:56<2:50:22, 50.60s/it]

24790


 57%|█████▋    | 270/471 [3:53:47<2:49:39, 50.65s/it]

24864


 58%|█████▊    | 271/471 [3:54:38<2:49:11, 50.76s/it]

24804


 58%|█████▊    | 272/471 [3:55:28<2:48:15, 50.73s/it]

24789


 58%|█████▊    | 273/471 [3:56:19<2:47:19, 50.70s/it]

24885


 58%|█████▊    | 274/471 [3:57:10<2:46:28, 50.71s/it]

24891


 58%|█████▊    | 275/471 [3:58:00<2:45:33, 50.68s/it]

24917


 59%|█████▊    | 276/471 [3:58:51<2:44:42, 50.68s/it]

24915


 59%|█████▉    | 277/471 [3:59:42<2:43:51, 50.68s/it]

24932


 59%|█████▉    | 278/471 [4:00:32<2:42:57, 50.66s/it]

25036


 59%|█████▉    | 279/471 [4:01:23<2:41:58, 50.62s/it]

24905


 59%|█████▉    | 280/471 [4:02:13<2:41:12, 50.64s/it]

25022


 60%|█████▉    | 281/471 [4:03:04<2:40:34, 50.71s/it]

24824


 60%|█████▉    | 282/471 [4:03:55<2:39:42, 50.70s/it]

24867


 60%|██████    | 283/471 [4:04:46<2:39:18, 50.84s/it]

24893


 60%|██████    | 284/471 [4:05:37<2:38:25, 50.83s/it]

25315


 61%|██████    | 285/471 [4:06:27<2:36:47, 50.58s/it]

25552


 61%|██████    | 286/471 [4:07:17<2:35:32, 50.45s/it]

25299


 61%|██████    | 287/471 [4:08:07<2:34:24, 50.35s/it]

25197


 61%|██████    | 288/471 [4:08:57<2:33:08, 50.21s/it]

25526


 61%|██████▏   | 289/471 [4:09:47<2:32:07, 50.15s/it]

25180


 62%|██████▏   | 290/471 [4:10:37<2:31:06, 50.09s/it]

25079


 62%|██████▏   | 291/471 [4:11:27<2:30:17, 50.09s/it]

25219


 62%|██████▏   | 292/471 [4:12:17<2:29:33, 50.13s/it]

25135


 62%|██████▏   | 293/471 [4:13:07<2:28:38, 50.10s/it]

25184


 62%|██████▏   | 294/471 [4:13:58<2:28:09, 50.22s/it]

25231


 63%|██████▎   | 295/471 [4:14:48<2:27:34, 50.31s/it]

25124


 63%|██████▎   | 296/471 [4:15:38<2:26:22, 50.19s/it]

25435


 63%|██████▎   | 297/471 [4:16:29<2:25:28, 50.16s/it]

25572


 63%|██████▎   | 298/471 [4:17:19<2:24:33, 50.14s/it]

25300


 63%|██████▎   | 299/471 [4:18:09<2:23:39, 50.11s/it]

25122


 64%|██████▎   | 300/471 [4:18:59<2:22:44, 50.08s/it]

25176


 64%|██████▍   | 301/471 [4:19:49<2:21:50, 50.06s/it]

25322


 64%|██████▍   | 302/471 [4:20:39<2:20:57, 50.04s/it]

25250


 64%|██████▍   | 303/471 [4:21:29<2:20:05, 50.03s/it]

25133


 65%|██████▍   | 304/471 [4:22:19<2:19:10, 50.01s/it]

25285


 65%|██████▍   | 305/471 [4:23:09<2:18:25, 50.04s/it]

25177


 65%|██████▍   | 306/471 [4:23:59<2:17:36, 50.04s/it]

25408


 65%|██████▌   | 307/471 [4:24:49<2:16:59, 50.12s/it]

25372


 65%|██████▌   | 308/471 [4:25:39<2:16:03, 50.08s/it]

25194


 66%|██████▌   | 309/471 [4:26:29<2:15:17, 50.11s/it]

25141


 66%|██████▌   | 310/471 [4:27:19<2:14:25, 50.10s/it]

25207


 66%|██████▌   | 311/471 [4:28:10<2:13:41, 50.14s/it]

25520


 66%|██████▌   | 312/471 [4:29:00<2:12:49, 50.12s/it]

25144


 66%|██████▋   | 313/471 [4:29:50<2:12:06, 50.17s/it]

25414


 67%|██████▋   | 314/471 [4:30:40<2:11:17, 50.18s/it]

25573


 67%|██████▋   | 315/471 [4:31:30<2:10:19, 50.13s/it]

25560


 67%|██████▋   | 316/471 [4:32:20<2:09:31, 50.14s/it]

25508


 67%|██████▋   | 317/471 [4:33:10<2:08:38, 50.12s/it]

25279


 68%|██████▊   | 318/471 [4:34:01<2:07:55, 50.17s/it]

25200


 68%|██████▊   | 319/471 [4:34:51<2:06:57, 50.12s/it]

25121


 68%|██████▊   | 320/471 [4:35:41<2:06:22, 50.22s/it]

25325


 68%|██████▊   | 321/471 [4:36:31<2:05:25, 50.17s/it]

25333


 68%|██████▊   | 322/471 [4:37:22<2:04:47, 50.25s/it]

25162


 69%|██████▊   | 323/471 [4:38:12<2:04:03, 50.30s/it]

25264


 69%|██████▉   | 324/471 [4:39:02<2:03:14, 50.30s/it]

25331


 69%|██████▉   | 325/471 [4:39:52<2:02:10, 50.21s/it]

25349


 69%|██████▉   | 326/471 [4:40:42<2:01:10, 50.14s/it]

25418


 69%|██████▉   | 327/471 [4:41:32<2:00:23, 50.16s/it]

25130


 70%|██████▉   | 328/471 [4:42:23<1:59:38, 50.20s/it]

25701


 70%|██████▉   | 329/471 [4:43:12<1:58:21, 50.01s/it]

25856


 70%|███████   | 330/471 [4:44:02<1:57:20, 49.93s/it]

25634


 70%|███████   | 331/471 [4:44:52<1:56:20, 49.86s/it]

25619


 70%|███████   | 332/471 [4:45:42<1:55:39, 49.92s/it]

25710


 71%|███████   | 333/471 [4:46:31<1:54:32, 49.80s/it]

25632


 71%|███████   | 334/471 [4:47:21<1:53:26, 49.68s/it]

25631


 71%|███████   | 335/471 [4:48:10<1:52:29, 49.63s/it]

25847


 71%|███████▏  | 336/471 [4:49:00<1:51:33, 49.59s/it]

25610


 72%|███████▏  | 337/471 [4:49:49<1:50:39, 49.55s/it]

25605


 72%|███████▏  | 338/471 [4:50:39<1:49:43, 49.50s/it]

25855


 72%|███████▏  | 339/471 [4:51:28<1:49:02, 49.56s/it]

25614


 72%|███████▏  | 340/471 [4:52:18<1:48:10, 49.55s/it]

25809


 72%|███████▏  | 341/471 [4:53:07<1:47:21, 49.55s/it]

25687


 73%|███████▎  | 342/471 [4:53:57<1:46:24, 49.49s/it]

25766


 73%|███████▎  | 343/471 [4:54:46<1:45:32, 49.47s/it]

25674


 73%|███████▎  | 344/471 [4:55:36<1:45:03, 49.63s/it]

25802


 73%|███████▎  | 345/471 [4:56:26<1:44:06, 49.57s/it]

25671


 73%|███████▎  | 346/471 [4:57:15<1:43:14, 49.55s/it]

25590


 74%|███████▎  | 347/471 [4:58:05<1:42:22, 49.53s/it]

25705


 74%|███████▍  | 348/471 [4:58:54<1:41:39, 49.59s/it]

25824


 74%|███████▍  | 349/471 [4:59:44<1:40:59, 49.67s/it]

25787


 74%|███████▍  | 350/471 [5:00:34<1:40:15, 49.72s/it]

25833


 75%|███████▍  | 351/471 [5:01:24<1:39:18, 49.65s/it]

25857


 75%|███████▍  | 352/471 [5:02:13<1:38:33, 49.69s/it]

25630


 75%|███████▍  | 353/471 [5:03:03<1:37:39, 49.66s/it]

25616


 75%|███████▌  | 354/471 [5:03:53<1:36:50, 49.66s/it]

25773


 75%|███████▌  | 355/471 [5:04:42<1:35:55, 49.62s/it]

25613


 76%|███████▌  | 356/471 [5:05:32<1:35:03, 49.59s/it]

25654


 76%|███████▌  | 357/471 [5:06:21<1:34:18, 49.64s/it]

25589


 76%|███████▌  | 358/471 [5:07:11<1:33:27, 49.63s/it]

26132


 76%|███████▌  | 359/471 [5:08:00<1:32:23, 49.50s/it]

25931


 76%|███████▋  | 360/471 [5:08:49<1:31:23, 49.40s/it]

26159


 77%|███████▋  | 361/471 [5:09:38<1:30:15, 49.23s/it]

26331


 77%|███████▋  | 362/471 [5:10:27<1:29:17, 49.15s/it]

26136


 77%|███████▋  | 363/471 [5:11:16<1:28:29, 49.16s/it]

26160


 77%|███████▋  | 364/471 [5:12:05<1:27:39, 49.15s/it]

26038


 77%|███████▋  | 365/471 [5:12:55<1:26:54, 49.19s/it]

26259


 78%|███████▊  | 366/471 [5:13:44<1:26:14, 49.28s/it]

26067


 78%|███████▊  | 367/471 [5:14:34<1:25:26, 49.29s/it]

25885


 78%|███████▊  | 368/471 [5:15:22<1:24:24, 49.17s/it]

26074


 78%|███████▊  | 369/471 [5:16:12<1:23:39, 49.21s/it]

26284


 79%|███████▊  | 370/471 [5:17:00<1:22:36, 49.08s/it]

26291


 79%|███████▉  | 371/471 [5:17:50<1:21:47, 49.08s/it]

26002


 79%|███████▉  | 372/471 [5:18:39<1:20:56, 49.06s/it]

25940


 79%|███████▉  | 373/471 [5:19:28<1:20:05, 49.03s/it]

25957


 79%|███████▉  | 374/471 [5:20:16<1:19:12, 49.00s/it]

26012


 80%|███████▉  | 375/471 [5:21:05<1:18:18, 48.94s/it]

26178


 80%|███████▉  | 376/471 [5:21:54<1:17:29, 48.94s/it]

26194


 80%|████████  | 377/471 [5:22:43<1:16:40, 48.94s/it]

26163


 80%|████████  | 378/471 [5:23:32<1:16:00, 49.04s/it]

26240


 80%|████████  | 379/471 [5:24:21<1:15:10, 49.03s/it]

26317


 81%|████████  | 380/471 [5:25:10<1:14:20, 49.02s/it]

26174


 81%|████████  | 381/471 [5:26:00<1:13:36, 49.07s/it]

26095


 81%|████████  | 382/471 [5:26:49<1:12:54, 49.15s/it]

26021


 81%|████████▏ | 383/471 [5:27:38<1:11:59, 49.09s/it]

26024


 82%|████████▏ | 384/471 [5:28:27<1:11:07, 49.05s/it]

25860


 82%|████████▏ | 385/471 [5:29:16<1:10:17, 49.04s/it]

26129


 82%|████████▏ | 386/471 [5:30:05<1:09:27, 49.03s/it]

25991


 82%|████████▏ | 387/471 [5:30:54<1:08:33, 48.97s/it]

26084


 82%|████████▏ | 388/471 [5:31:43<1:07:43, 48.95s/it]

26027


 83%|████████▎ | 389/471 [5:32:31<1:06:48, 48.88s/it]

26489


 83%|████████▎ | 391/471 [5:33:20<50:00, 37.51s/it]  

26589


 83%|████████▎ | 392/471 [5:34:08<53:01, 40.27s/it]

26545


 83%|████████▎ | 393/471 [5:34:57<55:04, 42.37s/it]

26394


 84%|████████▎ | 394/471 [5:35:45<56:35, 44.10s/it]

26526


 84%|████████▍ | 395/471 [5:36:34<57:33, 45.44s/it]

26409


 84%|████████▍ | 396/471 [5:37:23<57:51, 46.29s/it]

26437


 84%|████████▍ | 397/471 [5:38:11<57:54, 46.95s/it]

26411


 85%|████████▍ | 398/471 [5:39:00<57:45, 47.47s/it]

26560


 85%|████████▍ | 399/471 [5:39:49<57:25, 47.85s/it]

26601


 85%|████████▍ | 400/471 [5:40:37<56:52, 48.06s/it]

26593


 85%|████████▌ | 401/471 [5:41:26<56:20, 48.29s/it]

26532


 85%|████████▌ | 402/471 [5:42:15<55:38, 48.39s/it]

26405


 86%|████████▌ | 403/471 [5:43:03<54:55, 48.46s/it]

26585


 86%|████████▌ | 404/471 [5:43:52<54:04, 48.43s/it]

26491


 86%|████████▌ | 405/471 [5:44:40<53:17, 48.45s/it]

26514


 86%|████████▌ | 406/471 [5:45:29<52:31, 48.48s/it]

26749


 86%|████████▋ | 407/471 [5:46:17<51:38, 48.41s/it]

26662


 87%|████████▋ | 408/471 [5:47:06<50:52, 48.45s/it]

11427


 87%|████████▋ | 409/471 [5:47:54<50:00, 48.39s/it]

26685


 87%|████████▋ | 410/471 [5:48:42<49:07, 48.32s/it]

11436


 87%|████████▋ | 411/471 [5:49:30<48:13, 48.22s/it]

11387


 87%|████████▋ | 412/471 [5:50:18<47:19, 48.13s/it]

11290


 88%|████████▊ | 413/471 [5:51:06<46:28, 48.07s/it]

11314


 88%|████████▊ | 414/471 [5:51:54<45:40, 48.08s/it]

11206


 88%|████████▊ | 415/471 [5:52:42<44:50, 48.04s/it]

26738


 88%|████████▊ | 416/471 [5:53:30<44:02, 48.05s/it]

11378


 89%|████████▊ | 417/471 [5:54:18<43:19, 48.13s/it]

11219


 89%|████████▊ | 418/471 [5:55:07<42:34, 48.20s/it]

11205


 89%|████████▉ | 419/471 [5:55:55<41:46, 48.20s/it]

11216


 89%|████████▉ | 420/471 [5:56:43<40:58, 48.20s/it]

11224


 89%|████████▉ | 421/471 [5:57:32<40:15, 48.30s/it]

26649


 90%|████████▉ | 422/471 [5:58:20<39:23, 48.23s/it]

11325


 90%|████████▉ | 423/471 [5:59:08<38:33, 48.19s/it]

11283


 90%|█████████ | 424/471 [5:59:56<37:42, 48.15s/it]

26729


 90%|█████████ | 425/471 [6:00:44<36:55, 48.16s/it]

11416


 90%|█████████ | 426/471 [6:01:32<36:07, 48.16s/it]

11277


 91%|█████████ | 427/471 [6:02:20<35:16, 48.09s/it]

11279


 91%|█████████ | 428/471 [6:03:08<34:26, 48.05s/it]

11388


 91%|█████████ | 429/471 [6:03:56<33:38, 48.05s/it]

11521


 92%|█████████▏| 431/471 [6:04:44<24:32, 36.81s/it]

11570


 92%|█████████▏| 432/471 [6:05:31<25:38, 39.44s/it]

11797


 92%|█████████▏| 433/471 [6:06:18<26:18, 41.53s/it]

11854


 92%|█████████▏| 434/471 [6:07:06<26:39, 43.24s/it]

11857


 92%|█████████▏| 435/471 [6:07:54<26:39, 44.43s/it]

11728


 93%|█████████▎| 436/471 [6:08:41<26:25, 45.31s/it]

11867


 93%|█████████▎| 437/471 [6:09:29<26:03, 45.99s/it]

11835


 93%|█████████▎| 438/471 [6:10:16<25:32, 46.45s/it]

11837


 93%|█████████▎| 439/471 [6:11:04<24:56, 46.76s/it]

11625


 93%|█████████▎| 440/471 [6:11:51<24:16, 46.97s/it]

11891


 94%|█████████▎| 441/471 [6:12:39<23:34, 47.13s/it]

11742


 94%|█████████▍| 442/471 [6:13:26<22:48, 47.20s/it]

11890


 94%|█████████▍| 443/471 [6:14:14<22:02, 47.24s/it]

11704


 94%|█████████▍| 444/471 [6:15:01<21:16, 47.29s/it]

11712


 94%|█████████▍| 445/471 [6:15:49<20:32, 47.41s/it]

11882


 95%|█████████▍| 446/471 [6:16:37<19:50, 47.64s/it]

11608


 95%|█████████▍| 447/471 [6:17:25<19:05, 47.73s/it]

11525


 95%|█████████▌| 448/471 [6:18:13<18:18, 47.77s/it]

11720


 95%|█████████▌| 449/471 [6:19:00<17:30, 47.75s/it]

11851


 96%|█████████▌| 450/471 [6:19:48<16:42, 47.72s/it]

11451


 96%|█████████▌| 451/471 [6:20:36<15:55, 47.77s/it]

12110


 96%|█████████▌| 452/471 [6:21:23<15:03, 47.55s/it]

12106


 96%|█████████▌| 453/471 [6:22:10<14:12, 47.34s/it]

12239


 96%|█████████▋| 454/471 [6:22:57<13:22, 47.21s/it]

11910


 97%|█████████▋| 455/471 [6:23:44<12:35, 47.21s/it]

12056


 97%|█████████▋| 456/471 [6:24:31<11:47, 47.17s/it]

12191


 97%|█████████▋| 457/471 [6:25:18<10:59, 47.10s/it]

12082


 97%|█████████▋| 458/471 [6:26:05<10:12, 47.08s/it]

12075


 97%|█████████▋| 459/471 [6:26:52<09:24, 47.04s/it]

12256


 98%|█████████▊| 460/471 [6:27:40<08:39, 47.25s/it]

12226


 98%|█████████▊| 461/471 [6:28:27<07:51, 47.20s/it]

12228


 98%|█████████▊| 462/471 [6:29:14<07:03, 47.08s/it]

12078


 98%|█████████▊| 463/471 [6:30:01<06:16, 47.05s/it]

11985


 99%|█████████▊| 464/471 [6:30:48<05:29, 47.03s/it]

12192


 99%|█████████▊| 465/471 [6:31:35<04:42, 47.05s/it]

12087


 99%|█████████▉| 466/471 [6:32:21<03:54, 46.97s/it]

12042


 99%|█████████▉| 467/471 [6:33:09<03:08, 47.14s/it]

12253


 99%|█████████▉| 468/471 [6:33:56<02:21, 47.10s/it]

12029


100%|█████████▉| 469/471 [6:34:43<01:34, 47.06s/it]

12074


100%|█████████▉| 470/471 [6:35:30<00:47, 47.05s/it]

12281


100%|██████████| 471/471 [6:36:17<00:00, 50.48s/it]


In [290]:
pickle.dump(result_dict_story_gen_just_doc2vec_model, open('25_result_dict_story_gen_just_doc2vec_model_v128.p', 'wb'))