Install and import library modules. Download English stopwords and WordNet. Set logging to INFO

In [1]:
import sys
!{sys.executable} -m pip install gensim
!{sys.executable} -m pip install pyLDAvis
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install matplotlib
!{sys.executable} -m pip install nltk

import re, gensim, pyLDAvis, logging, datetime, multiprocessing, os, pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import floor, ceil
from nltk.corpus import stopwords
from nltk import download
from nltk.stem import WordNetLemmatizer

download('stopwords')
download('wordnet')

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)



[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/joostgadellaa/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/joostgadellaa/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Read the data as pandas dataframe, remove items with missing abstracts, and fill other missing data with empty strings

In [2]:
df = pd.read_csv('data/data_merged_and_filtered.csv')
df = df[df['abstract'].notna()]
df.fillna("", inplace=True)

Combine the text fiels into one

In [3]:
df["data"] = df["abstract"] + " " + df["keywords"] + " " + df["title"]

Remove copyright notices from the text

In [4]:
copyright_re = "(©|&#x00A9;|Copyright)(.*?(?=B\\.V\\.|Ltd|\\.))(B\\.V\\.|Ltd|\\.)"
rights = "All rights reserved."
df.replace(copyright_re, '', regex=True, inplace=True)
df.replace(rights, '', regex=False, inplace=True)

Tokenize, convert to lowercase, remove punctuation, remove stop-words, lemmatize

In [5]:
def preprocess(text):
  result = [word for word in gensim.utils.simple_preprocess(text, deacc=True) if word not in stop_words]
  wordnet_lemmatizer = WordNetLemmatizer()
  result = [wordnet_lemmatizer.lemmatize(word) for word in result]
  return result

stop_words = stopwords.words('english')
df["data_preprocessed"] = df["data"].map(preprocess)

Generate the Dictionairy and perform relative pruning

In [6]:
dictionary = gensim.corpora.Dictionary(df["data_preprocessed"])
dictionary.filter_extremes(no_below=(floor(0.005 * len(df))), no_above=0.99)

2021-07-19 11:44:21,344 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2021-07-19 11:44:23,236 : INFO : adding document #10000 to Dictionary(27074 unique tokens: ['ability', 'acceptable', 'additionally', 'analysis', 'anti']...)
2021-07-19 11:44:25,119 : INFO : adding document #20000 to Dictionary(38089 unique tokens: ['ability', 'acceptable', 'additionally', 'analysis', 'anti']...)
2021-07-19 11:44:27,025 : INFO : adding document #30000 to Dictionary(46717 unique tokens: ['ability', 'acceptable', 'additionally', 'analysis', 'anti']...)
2021-07-19 11:44:28,950 : INFO : adding document #40000 to Dictionary(53696 unique tokens: ['ability', 'acceptable', 'additionally', 'analysis', 'anti']...)
2021-07-19 11:44:30,737 : INFO : adding document #50000 to Dictionary(59648 unique tokens: ['ability', 'acceptable', 'additionally', 'analysis', 'anti']...)
2021-07-19 11:44:32,409 : INFO : adding document #60000 to Dictionary(64997 unique tokens: ['ability', 'acceptable', 'additional

Make the bag of words

In [7]:
corpus = [dictionary.doc2bow(doc) for doc in df["data_preprocessed"]]

Generate all models for evaluation

In [8]:
k = [25, 30, 36, 40, 42, 49] #[1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144, 169, 196, 225, 256, 289, 324, 361] later added [30, 40, 42]
random_states = [24, 27, 1024, 1027] #[24, 1024] later added [27, 1027]

texts = df["data_preprocessed"]
chunksize = ceil(len(texts))
passes = 20
iterations = 100
eval_every = None
dir_prior = 'auto'

for random_state in random_states:
    for num_topics in k:
        
        name = "rs{}k{}".format(random_state, num_topics)
        
        
        if 'model_' + name in os.listdir('models'):
            print(datetime.datetime.now().strftime('%H:%M:%S') + ' Model ' + name + ' already in directory. Skipped')
        
        else:
            print(datetime.datetime.now().strftime('%H:%M:%S') + " Building model " + name)
            lda_model = gensim.models.LdaModel(corpus, num_topics=num_topics, id2word=dictionary, alpha=dir_prior, eta=dir_prior, random_state=random_state, chunksize=chunksize, passes=passes, iterations=iterations)

            lda_model.save(os.path.join('models', 'model_' + name))

11:44:45 Model rs24k25 already in directory. Skipped
11:44:45 Model rs24k30 already in directory. Skipped
11:44:45 Model rs24k36 already in directory. Skipped
11:44:45 Model rs24k40 already in directory. Skipped
11:44:45 Model rs24k42 already in directory. Skipped
11:44:45 Model rs24k49 already in directory. Skipped
11:44:45 Model rs27k25 already in directory. Skipped
11:44:45 Model rs27k30 already in directory. Skipped
11:44:45 Model rs27k36 already in directory. Skipped
11:44:45 Model rs27k40 already in directory. Skipped
11:44:45 Model rs27k42 already in directory. Skipped
11:44:45 Model rs27k49 already in directory. Skipped
11:44:45 Model rs1024k25 already in directory. Skipped
11:44:45 Model rs1024k30 already in directory. Skipped
11:44:45 Model rs1024k36 already in directory. Skipped
11:44:45 Model rs1024k40 already in directory. Skipped
11:44:45 Model rs1024k42 already in directory. Skipped
11:44:45 Model rs1024k49 already in directory. Skipped
11:44:45 Model rs1027k25 already i

Evaluate all models present in the current directory

In [None]:
models_to_evaluate = []

for entry in os.scandir('models'):
    if entry.path.endswith('.state'):
        models_to_evaluate.append(entry.path[:-6])
        
models_to_evaluate.sort()        
        
for path in models_to_evaluate:
    print(datetime.datetime.now().strftime('%H:%M:%S') + ' Starting evaluation of ' + path)
    model = gensim.models.LdaModel.load(path)
    coherence_model_lda = gensim.models.coherencemodel.CoherenceModel(model=model, texts=texts, dictionary=dictionary, coherence='c_v')
    coherence = coherence_model_lda.get_coherence()
        
    with open(os.path.join('models', 'scores.txt'), "a") as f:
        f.write("\"" + str(os.path.split(path)[1]) + "\"" ' : ' + str(coherence) + ',\n')   

2021-07-19 11:44:45,269 : INFO : loading LdaModel object from models/model_rs1024k1
2021-07-19 11:44:45,276 : INFO : loading expElogbeta from models/model_rs1024k1.expElogbeta.npy with mmap=None
2021-07-19 11:44:45,279 : INFO : setting ignored attribute id2word to None
2021-07-19 11:44:45,281 : INFO : setting ignored attribute state to None
2021-07-19 11:44:45,288 : INFO : setting ignored attribute dispatcher to None
2021-07-19 11:44:45,304 : INFO : LdaModel lifecycle event {'fname': 'models/model_rs1024k1', 'datetime': '2021-07-19T11:44:45.304116', 'gensim': '4.0.1', 'python': '3.9.6 (v3.9.6:db3ff76da1, Jun 28 2021, 11:49:53) \n[Clang 6.0 (clang-600.0.57)]', 'platform': 'macOS-10.15.7-x86_64-i386-64bit', 'event': 'loaded'}
2021-07-19 11:44:45,313 : INFO : loading LdaState object from models/model_rs1024k1.state
2021-07-19 11:44:45,352 : INFO : LdaState lifecycle event {'fname': 'models/model_rs1024k1.state', 'datetime': '2021-07-19T11:44:45.352676', 'gensim': '4.0.1', 'python': '3.9.6

11:44:45Starting evaluation of model models/model_rs1024k1


2021-07-19 11:44:45,837 : INFO : 1 batches submitted to accumulate stats from 64 documents (1275 virtual)
2021-07-19 11:44:45,854 : INFO : 2 batches submitted to accumulate stats from 128 documents (2284 virtual)
2021-07-19 11:44:45,872 : INFO : 3 batches submitted to accumulate stats from 192 documents (2853 virtual)
2021-07-19 11:44:47,254 : INFO : 5 batches submitted to accumulate stats from 320 documents (5536 virtual)
2021-07-19 11:44:47,266 : INFO : 6 batches submitted to accumulate stats from 384 documents (7420 virtual)
2021-07-19 11:44:47,359 : INFO : 7 batches submitted to accumulate stats from 448 documents (9671 virtual)
2021-07-19 11:44:47,382 : INFO : 8 batches submitted to accumulate stats from 512 documents (12615 virtual)
2021-07-19 11:44:47,412 : INFO : 9 batches submitted to accumulate stats from 576 documents (14328 virtual)
2021-07-19 11:44:47,448 : INFO : 10 batches submitted to accumulate stats from 640 documents (16710 virtual)
2021-07-19 11:44:47,571 : INFO : 1

2021-07-19 11:44:51,306 : INFO : 77 batches submitted to accumulate stats from 4928 documents (125831 virtual)
2021-07-19 11:44:51,419 : INFO : 78 batches submitted to accumulate stats from 4992 documents (127422 virtual)
2021-07-19 11:44:51,429 : INFO : 79 batches submitted to accumulate stats from 5056 documents (129413 virtual)
2021-07-19 11:44:51,464 : INFO : 80 batches submitted to accumulate stats from 5120 documents (131502 virtual)
2021-07-19 11:44:51,554 : INFO : 81 batches submitted to accumulate stats from 5184 documents (133148 virtual)
2021-07-19 11:44:51,581 : INFO : 82 batches submitted to accumulate stats from 5248 documents (134804 virtual)
2021-07-19 11:44:51,631 : INFO : 83 batches submitted to accumulate stats from 5312 documents (135753 virtual)
2021-07-19 11:44:51,712 : INFO : 84 batches submitted to accumulate stats from 5376 documents (136836 virtual)
2021-07-19 11:44:51,761 : INFO : 85 batches submitted to accumulate stats from 5440 documents (137831 virtual)
2

2021-07-19 11:44:55,211 : INFO : 154 batches submitted to accumulate stats from 9856 documents (235742 virtual)
2021-07-19 11:44:55,286 : INFO : 155 batches submitted to accumulate stats from 9920 documents (238031 virtual)
2021-07-19 11:44:55,339 : INFO : 156 batches submitted to accumulate stats from 9984 documents (239112 virtual)
2021-07-19 11:44:55,351 : INFO : 157 batches submitted to accumulate stats from 10048 documents (240020 virtual)
2021-07-19 11:44:55,397 : INFO : 158 batches submitted to accumulate stats from 10112 documents (241148 virtual)
2021-07-19 11:44:55,436 : INFO : 159 batches submitted to accumulate stats from 10176 documents (242364 virtual)
2021-07-19 11:44:55,489 : INFO : 160 batches submitted to accumulate stats from 10240 documents (243828 virtual)
2021-07-19 11:44:55,560 : INFO : 161 batches submitted to accumulate stats from 10304 documents (246652 virtual)
2021-07-19 11:44:55,593 : INFO : 162 batches submitted to accumulate stats from 10368 documents (24

2021-07-19 11:45:00,826 : INFO : 228 batches submitted to accumulate stats from 14592 documents (342265 virtual)
2021-07-19 11:45:00,854 : INFO : 229 batches submitted to accumulate stats from 14656 documents (342625 virtual)
2021-07-19 11:45:01,268 : INFO : 230 batches submitted to accumulate stats from 14720 documents (343540 virtual)
2021-07-19 11:45:01,482 : INFO : 231 batches submitted to accumulate stats from 14784 documents (344421 virtual)
2021-07-19 11:45:01,505 : INFO : 232 batches submitted to accumulate stats from 14848 documents (344857 virtual)
2021-07-19 11:45:01,592 : INFO : 233 batches submitted to accumulate stats from 14912 documents (345481 virtual)
2021-07-19 11:45:01,628 : INFO : 234 batches submitted to accumulate stats from 14976 documents (346306 virtual)
2021-07-19 11:45:01,669 : INFO : 235 batches submitted to accumulate stats from 15040 documents (347123 virtual)
2021-07-19 11:45:01,730 : INFO : 236 batches submitted to accumulate stats from 15104 documents 

2021-07-19 11:45:05,527 : INFO : 305 batches submitted to accumulate stats from 19520 documents (419701 virtual)
2021-07-19 11:45:05,625 : INFO : 306 batches submitted to accumulate stats from 19584 documents (420394 virtual)
2021-07-19 11:45:05,710 : INFO : 307 batches submitted to accumulate stats from 19648 documents (420570 virtual)
2021-07-19 11:45:05,817 : INFO : 308 batches submitted to accumulate stats from 19712 documents (421798 virtual)
2021-07-19 11:45:05,838 : INFO : 309 batches submitted to accumulate stats from 19776 documents (423068 virtual)
2021-07-19 11:45:06,003 : INFO : 310 batches submitted to accumulate stats from 19840 documents (424241 virtual)
2021-07-19 11:45:06,021 : INFO : 311 batches submitted to accumulate stats from 19904 documents (425116 virtual)
2021-07-19 11:45:06,059 : INFO : 312 batches submitted to accumulate stats from 19968 documents (426000 virtual)
2021-07-19 11:45:06,142 : INFO : 313 batches submitted to accumulate stats from 20032 documents 

2021-07-19 11:45:09,621 : INFO : 380 batches submitted to accumulate stats from 24320 documents (492353 virtual)
2021-07-19 11:45:09,789 : INFO : 384 batches submitted to accumulate stats from 24576 documents (492527 virtual)
2021-07-19 11:45:09,842 : INFO : 385 batches submitted to accumulate stats from 24640 documents (493467 virtual)
2021-07-19 11:45:09,874 : INFO : 386 batches submitted to accumulate stats from 24704 documents (494755 virtual)
2021-07-19 11:45:09,911 : INFO : 387 batches submitted to accumulate stats from 24768 documents (495796 virtual)
2021-07-19 11:45:09,922 : INFO : 388 batches submitted to accumulate stats from 24832 documents (497621 virtual)
2021-07-19 11:45:09,942 : INFO : 389 batches submitted to accumulate stats from 24896 documents (500751 virtual)
2021-07-19 11:45:10,024 : INFO : 390 batches submitted to accumulate stats from 24960 documents (505228 virtual)
2021-07-19 11:45:10,060 : INFO : 391 batches submitted to accumulate stats from 25024 documents 

2021-07-19 11:45:13,718 : INFO : 458 batches submitted to accumulate stats from 29312 documents (578067 virtual)
2021-07-19 11:45:13,735 : INFO : 459 batches submitted to accumulate stats from 29376 documents (578073 virtual)
2021-07-19 11:45:13,819 : INFO : 460 batches submitted to accumulate stats from 29440 documents (579654 virtual)
2021-07-19 11:45:13,866 : INFO : 461 batches submitted to accumulate stats from 29504 documents (581092 virtual)
2021-07-19 11:45:13,886 : INFO : 462 batches submitted to accumulate stats from 29568 documents (581707 virtual)
2021-07-19 11:45:13,935 : INFO : 463 batches submitted to accumulate stats from 29632 documents (582597 virtual)
2021-07-19 11:45:13,989 : INFO : 464 batches submitted to accumulate stats from 29696 documents (583571 virtual)
2021-07-19 11:45:14,004 : INFO : 465 batches submitted to accumulate stats from 29760 documents (583726 virtual)
2021-07-19 11:45:14,101 : INFO : 466 batches submitted to accumulate stats from 29824 documents 

2021-07-19 11:45:18,253 : INFO : 538 batches submitted to accumulate stats from 34432 documents (642304 virtual)
2021-07-19 11:45:18,325 : INFO : 539 batches submitted to accumulate stats from 34496 documents (642493 virtual)
2021-07-19 11:45:18,334 : INFO : 540 batches submitted to accumulate stats from 34560 documents (643520 virtual)
2021-07-19 11:45:18,401 : INFO : 541 batches submitted to accumulate stats from 34624 documents (643735 virtual)
2021-07-19 11:45:18,494 : INFO : 542 batches submitted to accumulate stats from 34688 documents (644440 virtual)
2021-07-19 11:45:18,552 : INFO : 543 batches submitted to accumulate stats from 34752 documents (644857 virtual)
2021-07-19 11:45:18,684 : INFO : 544 batches submitted to accumulate stats from 34816 documents (645025 virtual)
2021-07-19 11:45:18,711 : INFO : 545 batches submitted to accumulate stats from 34880 documents (645656 virtual)
2021-07-19 11:45:18,808 : INFO : 546 batches submitted to accumulate stats from 34944 documents 

2021-07-19 11:45:22,924 : INFO : 627 batches submitted to accumulate stats from 40128 documents (703280 virtual)
2021-07-19 11:45:22,948 : INFO : 628 batches submitted to accumulate stats from 40192 documents (703546 virtual)
2021-07-19 11:45:22,963 : INFO : 629 batches submitted to accumulate stats from 40256 documents (703735 virtual)
2021-07-19 11:45:23,196 : INFO : 631 batches submitted to accumulate stats from 40384 documents (704218 virtual)
2021-07-19 11:45:23,219 : INFO : 632 batches submitted to accumulate stats from 40448 documents (704500 virtual)
2021-07-19 11:45:23,261 : INFO : 633 batches submitted to accumulate stats from 40512 documents (704604 virtual)
2021-07-19 11:45:23,340 : INFO : 636 batches submitted to accumulate stats from 40704 documents (704917 virtual)
2021-07-19 11:45:23,415 : INFO : 637 batches submitted to accumulate stats from 40768 documents (705674 virtual)
2021-07-19 11:45:23,429 : INFO : 638 batches submitted to accumulate stats from 40832 documents 

2021-07-19 11:45:29,021 : INFO : 725 batches submitted to accumulate stats from 46400 documents (751973 virtual)
2021-07-19 11:45:29,313 : INFO : 726 batches submitted to accumulate stats from 46464 documents (753035 virtual)
2021-07-19 11:45:29,428 : INFO : 727 batches submitted to accumulate stats from 46528 documents (754324 virtual)
2021-07-19 11:45:29,696 : INFO : 729 batches submitted to accumulate stats from 46656 documents (754595 virtual)
2021-07-19 11:45:30,019 : INFO : 730 batches submitted to accumulate stats from 46720 documents (756121 virtual)
2021-07-19 11:45:30,061 : INFO : 731 batches submitted to accumulate stats from 46784 documents (756527 virtual)
2021-07-19 11:45:30,261 : INFO : 732 batches submitted to accumulate stats from 46848 documents (757789 virtual)
2021-07-19 11:45:30,330 : INFO : 733 batches submitted to accumulate stats from 46912 documents (759343 virtual)
2021-07-19 11:45:30,464 : INFO : 734 batches submitted to accumulate stats from 46976 documents 

2021-07-19 11:45:36,234 : INFO : 829 batches submitted to accumulate stats from 53056 documents (798292 virtual)
2021-07-19 11:45:36,297 : INFO : 830 batches submitted to accumulate stats from 53120 documents (798519 virtual)
2021-07-19 11:45:36,307 : INFO : 831 batches submitted to accumulate stats from 53184 documents (798655 virtual)
2021-07-19 11:45:36,460 : INFO : 835 batches submitted to accumulate stats from 53440 documents (798908 virtual)
2021-07-19 11:45:36,469 : INFO : 836 batches submitted to accumulate stats from 53504 documents (800195 virtual)
2021-07-19 11:45:36,548 : INFO : 838 batches submitted to accumulate stats from 53632 documents (799703 virtual)
2021-07-19 11:45:36,561 : INFO : 839 batches submitted to accumulate stats from 53696 documents (801021 virtual)
2021-07-19 11:45:36,580 : INFO : 840 batches submitted to accumulate stats from 53760 documents (805421 virtual)
2021-07-19 11:45:36,667 : INFO : 842 batches submitted to accumulate stats from 53888 documents 

2021-07-19 11:45:41,291 : INFO : 989 batches submitted to accumulate stats from 63296 documents (809316 virtual)
2021-07-19 11:45:41,347 : INFO : 990 batches submitted to accumulate stats from 63360 documents (809480 virtual)
2021-07-19 11:45:41,377 : INFO : 992 batches submitted to accumulate stats from 63488 documents (809257 virtual)
2021-07-19 11:45:41,448 : INFO : 993 batches submitted to accumulate stats from 63552 documents (809798 virtual)
2021-07-19 11:45:41,473 : INFO : 994 batches submitted to accumulate stats from 63616 documents (810967 virtual)
2021-07-19 11:45:41,505 : INFO : 995 batches submitted to accumulate stats from 63680 documents (811197 virtual)
2021-07-19 11:45:41,557 : INFO : 996 batches submitted to accumulate stats from 63744 documents (811548 virtual)
2021-07-19 11:45:41,567 : INFO : 997 batches submitted to accumulate stats from 63808 documents (812544 virtual)
2021-07-19 11:45:41,661 : INFO : 999 batches submitted to accumulate stats from 63936 documents 

2021-07-19 11:45:47,675 : INFO : using ParallelWordOccurrenceAccumulator(processes=3, batch_size=64) to estimate probabilities from sliding windows


11:45:47Starting evaluation of model models/model_rs1024k100


2021-07-19 11:45:48,064 : INFO : 1 batches submitted to accumulate stats from 64 documents (1275 virtual)
2021-07-19 11:45:48,072 : INFO : 2 batches submitted to accumulate stats from 128 documents (2284 virtual)
2021-07-19 11:45:48,090 : INFO : 3 batches submitted to accumulate stats from 192 documents (2853 virtual)
2021-07-19 11:45:49,476 : INFO : 5 batches submitted to accumulate stats from 320 documents (5536 virtual)
2021-07-19 11:45:49,489 : INFO : 6 batches submitted to accumulate stats from 384 documents (7420 virtual)
2021-07-19 11:45:51,342 : INFO : 7 batches submitted to accumulate stats from 448 documents (9671 virtual)
2021-07-19 11:45:51,428 : INFO : 8 batches submitted to accumulate stats from 512 documents (12615 virtual)
2021-07-19 11:45:51,540 : INFO : 9 batches submitted to accumulate stats from 576 documents (14328 virtual)
2021-07-19 11:45:52,632 : INFO : 10 batches submitted to accumulate stats from 640 documents (16616 virtual)
2021-07-19 11:45:53,976 : INFO : 1

2021-07-19 11:46:46,972 : INFO : 77 batches submitted to accumulate stats from 4928 documents (125619 virtual)
2021-07-19 11:46:47,491 : INFO : 78 batches submitted to accumulate stats from 4992 documents (127133 virtual)
2021-07-19 11:46:47,670 : INFO : 79 batches submitted to accumulate stats from 5056 documents (129278 virtual)
2021-07-19 11:46:49,182 : INFO : 80 batches submitted to accumulate stats from 5120 documents (131307 virtual)
2021-07-19 11:46:49,479 : INFO : 81 batches submitted to accumulate stats from 5184 documents (132851 virtual)
2021-07-19 11:46:50,385 : INFO : 82 batches submitted to accumulate stats from 5248 documents (134663 virtual)
2021-07-19 11:46:50,907 : INFO : 83 batches submitted to accumulate stats from 5312 documents (135641 virtual)
2021-07-19 11:46:51,574 : INFO : 84 batches submitted to accumulate stats from 5376 documents (136698 virtual)
2021-07-19 11:46:52,895 : INFO : 85 batches submitted to accumulate stats from 5440 documents (137698 virtual)
2

2021-07-19 11:47:46,630 : INFO : 155 batches submitted to accumulate stats from 9920 documents (237833 virtual)
2021-07-19 11:47:47,155 : INFO : 156 batches submitted to accumulate stats from 9984 documents (238949 virtual)
2021-07-19 11:47:47,333 : INFO : 157 batches submitted to accumulate stats from 10048 documents (239931 virtual)
2021-07-19 11:47:49,063 : INFO : 158 batches submitted to accumulate stats from 10112 documents (240912 virtual)
2021-07-19 11:47:49,495 : INFO : 159 batches submitted to accumulate stats from 10176 documents (242289 virtual)
2021-07-19 11:47:50,004 : INFO : 160 batches submitted to accumulate stats from 10240 documents (243704 virtual)
2021-07-19 11:47:51,527 : INFO : 161 batches submitted to accumulate stats from 10304 documents (246287 virtual)
2021-07-19 11:47:51,737 : INFO : 162 batches submitted to accumulate stats from 10368 documents (247196 virtual)
2021-07-19 11:47:52,099 : INFO : 163 batches submitted to accumulate stats from 10432 documents (2

2021-07-19 11:48:42,896 : INFO : 228 batches submitted to accumulate stats from 14592 documents (341682 virtual)
2021-07-19 11:48:43,326 : INFO : 229 batches submitted to accumulate stats from 14656 documents (342001 virtual)
2021-07-19 11:48:44,312 : INFO : 230 batches submitted to accumulate stats from 14720 documents (342884 virtual)
2021-07-19 11:48:45,482 : INFO : 231 batches submitted to accumulate stats from 14784 documents (343770 virtual)
2021-07-19 11:48:46,089 : INFO : 232 batches submitted to accumulate stats from 14848 documents (344295 virtual)
2021-07-19 11:48:46,098 : INFO : 233 batches submitted to accumulate stats from 14912 documents (344787 virtual)
2021-07-19 11:48:47,478 : INFO : 234 batches submitted to accumulate stats from 14976 documents (345687 virtual)
2021-07-19 11:48:47,513 : INFO : 235 batches submitted to accumulate stats from 15040 documents (346616 virtual)
2021-07-19 11:48:47,700 : INFO : 236 batches submitted to accumulate stats from 15104 documents 

2021-07-19 11:49:34,668 : INFO : 306 batches submitted to accumulate stats from 19584 documents (419750 virtual)
2021-07-19 11:49:35,391 : INFO : 308 batches submitted to accumulate stats from 19712 documents (420858 virtual)
2021-07-19 11:49:36,631 : INFO : 309 batches submitted to accumulate stats from 19776 documents (422259 virtual)
2021-07-19 11:49:37,379 : INFO : 310 batches submitted to accumulate stats from 19840 documents (423363 virtual)
2021-07-19 11:49:37,806 : INFO : 311 batches submitted to accumulate stats from 19904 documents (423892 virtual)
2021-07-19 11:49:38,510 : INFO : 312 batches submitted to accumulate stats from 19968 documents (424942 virtual)
2021-07-19 11:49:38,624 : INFO : 313 batches submitted to accumulate stats from 20032 documents (425677 virtual)
2021-07-19 11:49:39,743 : INFO : 314 batches submitted to accumulate stats from 20096 documents (426491 virtual)
2021-07-19 11:49:40,503 : INFO : 315 batches submitted to accumulate stats from 20160 documents 

2021-07-19 11:50:24,635 : INFO : 384 batches submitted to accumulate stats from 24576 documents (491707 virtual)
2021-07-19 11:50:25,166 : INFO : 385 batches submitted to accumulate stats from 24640 documents (492523 virtual)
2021-07-19 11:50:25,642 : INFO : 386 batches submitted to accumulate stats from 24704 documents (493359 virtual)
2021-07-19 11:50:26,509 : INFO : 387 batches submitted to accumulate stats from 24768 documents (494515 virtual)
2021-07-19 11:50:26,531 : INFO : 388 batches submitted to accumulate stats from 24832 documents (496222 virtual)
2021-07-19 11:50:26,915 : INFO : 389 batches submitted to accumulate stats from 24896 documents (499006 virtual)
2021-07-19 11:50:28,012 : INFO : 390 batches submitted to accumulate stats from 24960 documents (502813 virtual)
2021-07-19 11:50:28,399 : INFO : 391 batches submitted to accumulate stats from 25024 documents (509708 virtual)
2021-07-19 11:50:28,582 : INFO : 392 batches submitted to accumulate stats from 25088 documents 

2021-07-19 11:51:13,506 : INFO : 458 batches submitted to accumulate stats from 29312 documents (576634 virtual)
2021-07-19 11:51:14,284 : INFO : 459 batches submitted to accumulate stats from 29376 documents (576656 virtual)
2021-07-19 11:51:14,803 : INFO : 460 batches submitted to accumulate stats from 29440 documents (577583 virtual)
2021-07-19 11:51:15,429 : INFO : 461 batches submitted to accumulate stats from 29504 documents (579012 virtual)
2021-07-19 11:51:16,365 : INFO : 462 batches submitted to accumulate stats from 29568 documents (580541 virtual)
2021-07-19 11:51:16,971 : INFO : 463 batches submitted to accumulate stats from 29632 documents (581301 virtual)
2021-07-19 11:51:17,090 : INFO : 464 batches submitted to accumulate stats from 29696 documents (582062 virtual)
2021-07-19 11:51:17,891 : INFO : 465 batches submitted to accumulate stats from 29760 documents (582436 virtual)
2021-07-19 11:51:18,767 : INFO : 466 batches submitted to accumulate stats from 29824 documents 

2021-07-19 11:52:07,647 : INFO : 541 batches submitted to accumulate stats from 34624 documents (642386 virtual)
2021-07-19 11:52:08,276 : INFO : 542 batches submitted to accumulate stats from 34688 documents (643343 virtual)
2021-07-19 11:52:09,445 : INFO : 543 batches submitted to accumulate stats from 34752 documents (643481 virtual)
2021-07-19 11:52:09,827 : INFO : 544 batches submitted to accumulate stats from 34816 documents (643572 virtual)
2021-07-19 11:52:09,923 : INFO : 545 batches submitted to accumulate stats from 34880 documents (644269 virtual)
2021-07-19 11:52:12,270 : INFO : 546 batches submitted to accumulate stats from 34944 documents (644724 virtual)
2021-07-19 11:52:12,366 : INFO : 547 batches submitted to accumulate stats from 35008 documents (645459 virtual)
2021-07-19 11:52:12,583 : INFO : 548 batches submitted to accumulate stats from 35072 documents (646017 virtual)
2021-07-19 11:52:14,022 : INFO : 550 batches submitted to accumulate stats from 35200 documents 

2021-07-19 11:53:02,659 : INFO : 628 batches submitted to accumulate stats from 40192 documents (702195 virtual)
2021-07-19 11:53:03,058 : INFO : 629 batches submitted to accumulate stats from 40256 documents (702201 virtual)
2021-07-19 11:53:03,788 : INFO : 630 batches submitted to accumulate stats from 40320 documents (702445 virtual)
2021-07-19 11:53:04,328 : INFO : 631 batches submitted to accumulate stats from 40384 documents (702509 virtual)
2021-07-19 11:53:04,661 : INFO : 632 batches submitted to accumulate stats from 40448 documents (703125 virtual)
2021-07-19 11:53:05,216 : INFO : 633 batches submitted to accumulate stats from 40512 documents (703400 virtual)
2021-07-19 11:53:06,782 : INFO : 636 batches submitted to accumulate stats from 40704 documents (703271 virtual)
2021-07-19 11:53:07,527 : INFO : 637 batches submitted to accumulate stats from 40768 documents (703835 virtual)
2021-07-19 11:53:08,103 : INFO : 638 batches submitted to accumulate stats from 40832 documents 

2021-07-19 11:54:07,419 : INFO : 728 batches submitted to accumulate stats from 46592 documents (752550 virtual)
2021-07-19 11:54:08,910 : INFO : 730 batches submitted to accumulate stats from 46720 documents (753081 virtual)
2021-07-19 11:54:09,378 : INFO : 731 batches submitted to accumulate stats from 46784 documents (754781 virtual)
2021-07-19 11:54:09,690 : INFO : 732 batches submitted to accumulate stats from 46848 documents (755753 virtual)
2021-07-19 11:54:10,968 : INFO : 733 batches submitted to accumulate stats from 46912 documents (756701 virtual)
2021-07-19 11:54:11,124 : INFO : 734 batches submitted to accumulate stats from 46976 documents (757416 virtual)
2021-07-19 11:54:11,359 : INFO : 735 batches submitted to accumulate stats from 47040 documents (757966 virtual)
2021-07-19 11:54:12,849 : INFO : 736 batches submitted to accumulate stats from 47104 documents (758806 virtual)
2021-07-19 11:54:13,777 : INFO : 737 batches submitted to accumulate stats from 47168 documents 

2021-07-19 11:55:09,719 : INFO : 836 batches submitted to accumulate stats from 53504 documents (798107 virtual)
2021-07-19 11:55:11,143 : INFO : 839 batches submitted to accumulate stats from 53696 documents (797613 virtual)
2021-07-19 11:55:12,036 : INFO : 840 batches submitted to accumulate stats from 53760 documents (800106 virtual)
2021-07-19 11:55:13,018 : INFO : 841 batches submitted to accumulate stats from 53824 documents (803253 virtual)
2021-07-19 11:55:13,413 : INFO : 843 batches submitted to accumulate stats from 53952 documents (803963 virtual)
2021-07-19 11:55:14,377 : INFO : 844 batches submitted to accumulate stats from 54016 documents (804293 virtual)
2021-07-19 11:55:15,983 : INFO : 846 batches submitted to accumulate stats from 54144 documents (803990 virtual)
2021-07-19 11:55:20,055 : INFO : 852 batches submitted to accumulate stats from 54528 documents (803109 virtual)
2021-07-19 11:55:21,476 : INFO : 856 batches submitted to accumulate stats from 54784 documents 

2021-07-19 11:56:38,788 : INFO : 1000 batches submitted to accumulate stats from 64000 documents (810197 virtual)
2021-07-19 11:56:41,671 : INFO : 1004 batches submitted to accumulate stats from 64256 documents (809529 virtual)
2021-07-19 11:56:42,174 : INFO : 1006 batches submitted to accumulate stats from 64384 documents (808861 virtual)
2021-07-19 11:56:43,284 : INFO : 1007 batches submitted to accumulate stats from 64448 documents (810423 virtual)
2021-07-19 11:56:48,007 : INFO : 1018 batches submitted to accumulate stats from 65152 documents (803717 virtual)
2021-07-19 11:56:50,070 : INFO : 1023 batches submitted to accumulate stats from 65472 documents (802351 virtual)
2021-07-19 11:56:54,721 : INFO : 1031 batches submitted to accumulate stats from 65984 documents (799331 virtual)
2021-07-19 11:56:56,063 : INFO : 1034 batches submitted to accumulate stats from 66176 documents (798533 virtual)
2021-07-19 11:56:56,763 : INFO : 1035 batches submitted to accumulate stats from 66240 d

11:58:43Starting evaluation of model models/model_rs1024k121


2021-07-19 11:58:44,475 : INFO : 1 batches submitted to accumulate stats from 64 documents (1275 virtual)
2021-07-19 11:58:44,489 : INFO : 2 batches submitted to accumulate stats from 128 documents (2284 virtual)
2021-07-19 11:58:44,508 : INFO : 3 batches submitted to accumulate stats from 192 documents (2853 virtual)
2021-07-19 11:58:45,895 : INFO : 5 batches submitted to accumulate stats from 320 documents (5536 virtual)
2021-07-19 11:58:45,906 : INFO : 6 batches submitted to accumulate stats from 384 documents (7420 virtual)
2021-07-19 11:58:47,948 : INFO : 7 batches submitted to accumulate stats from 448 documents (9671 virtual)
2021-07-19 11:58:48,242 : INFO : 8 batches submitted to accumulate stats from 512 documents (12615 virtual)
2021-07-19 11:58:48,392 : INFO : 9 batches submitted to accumulate stats from 576 documents (14328 virtual)
2021-07-19 11:58:49,410 : INFO : 10 batches submitted to accumulate stats from 640 documents (16616 virtual)
2021-07-19 11:58:50,833 : INFO : 1

2021-07-19 11:59:48,036 : INFO : 77 batches submitted to accumulate stats from 4928 documents (125619 virtual)
2021-07-19 11:59:49,568 : INFO : 78 batches submitted to accumulate stats from 4992 documents (127133 virtual)
2021-07-19 11:59:49,722 : INFO : 79 batches submitted to accumulate stats from 5056 documents (129278 virtual)
2021-07-19 11:59:50,965 : INFO : 80 batches submitted to accumulate stats from 5120 documents (131307 virtual)
2021-07-19 11:59:52,206 : INFO : 81 batches submitted to accumulate stats from 5184 documents (132851 virtual)
2021-07-19 11:59:52,946 : INFO : 82 batches submitted to accumulate stats from 5248 documents (134663 virtual)
2021-07-19 11:59:53,016 : INFO : 83 batches submitted to accumulate stats from 5312 documents (135641 virtual)
2021-07-19 11:59:54,684 : INFO : 84 batches submitted to accumulate stats from 5376 documents (136698 virtual)
2021-07-19 11:59:55,755 : INFO : 85 batches submitted to accumulate stats from 5440 documents (137698 virtual)
2

2021-07-19 12:00:53,794 : INFO : 155 batches submitted to accumulate stats from 9920 documents (237833 virtual)
2021-07-19 12:00:54,473 : INFO : 156 batches submitted to accumulate stats from 9984 documents (238949 virtual)
2021-07-19 12:00:54,570 : INFO : 157 batches submitted to accumulate stats from 10048 documents (239931 virtual)
2021-07-19 12:00:55,833 : INFO : 158 batches submitted to accumulate stats from 10112 documents (240912 virtual)
2021-07-19 12:00:56,433 : INFO : 159 batches submitted to accumulate stats from 10176 documents (242289 virtual)
2021-07-19 12:00:56,644 : INFO : 160 batches submitted to accumulate stats from 10240 documents (243704 virtual)
2021-07-19 12:00:58,518 : INFO : 161 batches submitted to accumulate stats from 10304 documents (246287 virtual)
2021-07-19 12:00:58,787 : INFO : 162 batches submitted to accumulate stats from 10368 documents (247196 virtual)
2021-07-19 12:00:58,931 : INFO : 163 batches submitted to accumulate stats from 10432 documents (2

2021-07-19 12:01:51,884 : INFO : 228 batches submitted to accumulate stats from 14592 documents (341682 virtual)
2021-07-19 12:01:52,249 : INFO : 229 batches submitted to accumulate stats from 14656 documents (342001 virtual)
2021-07-19 12:01:53,079 : INFO : 230 batches submitted to accumulate stats from 14720 documents (342884 virtual)
2021-07-19 12:01:54,686 : INFO : 231 batches submitted to accumulate stats from 14784 documents (343770 virtual)
2021-07-19 12:01:55,116 : INFO : 232 batches submitted to accumulate stats from 14848 documents (344295 virtual)
2021-07-19 12:01:55,326 : INFO : 233 batches submitted to accumulate stats from 14912 documents (344787 virtual)
2021-07-19 12:01:56,734 : INFO : 234 batches submitted to accumulate stats from 14976 documents (345687 virtual)
2021-07-19 12:01:56,983 : INFO : 235 batches submitted to accumulate stats from 15040 documents (346616 virtual)
2021-07-19 12:01:57,169 : INFO : 236 batches submitted to accumulate stats from 15104 documents 

2021-07-19 12:02:49,159 : INFO : 306 batches submitted to accumulate stats from 19584 documents (419750 virtual)
2021-07-19 12:02:50,268 : INFO : 308 batches submitted to accumulate stats from 19712 documents (420858 virtual)
2021-07-19 12:02:51,215 : INFO : 309 batches submitted to accumulate stats from 19776 documents (422259 virtual)
2021-07-19 12:02:52,426 : INFO : 310 batches submitted to accumulate stats from 19840 documents (423363 virtual)
2021-07-19 12:02:52,708 : INFO : 311 batches submitted to accumulate stats from 19904 documents (423892 virtual)
2021-07-19 12:02:53,334 : INFO : 312 batches submitted to accumulate stats from 19968 documents (424942 virtual)
2021-07-19 12:02:53,871 : INFO : 313 batches submitted to accumulate stats from 20032 documents (425677 virtual)
2021-07-19 12:02:54,794 : INFO : 314 batches submitted to accumulate stats from 20096 documents (426491 virtual)
2021-07-19 12:02:55,853 : INFO : 315 batches submitted to accumulate stats from 20160 documents 

2021-07-19 12:03:46,436 : INFO : 384 batches submitted to accumulate stats from 24576 documents (491707 virtual)
2021-07-19 12:03:47,175 : INFO : 385 batches submitted to accumulate stats from 24640 documents (492523 virtual)
2021-07-19 12:03:47,228 : INFO : 386 batches submitted to accumulate stats from 24704 documents (493359 virtual)
2021-07-19 12:03:48,602 : INFO : 387 batches submitted to accumulate stats from 24768 documents (494515 virtual)
2021-07-19 12:03:48,684 : INFO : 388 batches submitted to accumulate stats from 24832 documents (496222 virtual)
2021-07-19 12:03:48,816 : INFO : 389 batches submitted to accumulate stats from 24896 documents (499006 virtual)
2021-07-19 12:03:50,329 : INFO : 390 batches submitted to accumulate stats from 24960 documents (502813 virtual)
2021-07-19 12:03:50,761 : INFO : 391 batches submitted to accumulate stats from 25024 documents (509708 virtual)
2021-07-19 12:03:50,827 : INFO : 392 batches submitted to accumulate stats from 25088 documents 

2021-07-19 12:04:41,439 : INFO : 458 batches submitted to accumulate stats from 29312 documents (576634 virtual)
2021-07-19 12:04:42,761 : INFO : 459 batches submitted to accumulate stats from 29376 documents (576656 virtual)
2021-07-19 12:04:43,055 : INFO : 460 batches submitted to accumulate stats from 29440 documents (577583 virtual)
2021-07-19 12:04:43,612 : INFO : 461 batches submitted to accumulate stats from 29504 documents (579012 virtual)
2021-07-19 12:04:44,960 : INFO : 462 batches submitted to accumulate stats from 29568 documents (580541 virtual)
2021-07-19 12:04:45,308 : INFO : 463 batches submitted to accumulate stats from 29632 documents (581301 virtual)
2021-07-19 12:04:45,456 : INFO : 464 batches submitted to accumulate stats from 29696 documents (582062 virtual)
2021-07-19 12:04:46,825 : INFO : 465 batches submitted to accumulate stats from 29760 documents (582436 virtual)
2021-07-19 12:04:47,367 : INFO : 466 batches submitted to accumulate stats from 29824 documents 

2021-07-19 12:05:39,739 : INFO : 541 batches submitted to accumulate stats from 34624 documents (642386 virtual)
2021-07-19 12:05:40,831 : INFO : 542 batches submitted to accumulate stats from 34688 documents (643343 virtual)
2021-07-19 12:05:41,759 : INFO : 543 batches submitted to accumulate stats from 34752 documents (643481 virtual)
2021-07-19 12:05:42,346 : INFO : 544 batches submitted to accumulate stats from 34816 documents (643572 virtual)
2021-07-19 12:05:42,554 : INFO : 545 batches submitted to accumulate stats from 34880 documents (644269 virtual)
2021-07-19 12:05:44,298 : INFO : 546 batches submitted to accumulate stats from 34944 documents (644724 virtual)
2021-07-19 12:05:44,372 : INFO : 547 batches submitted to accumulate stats from 35008 documents (645459 virtual)
2021-07-19 12:05:44,585 : INFO : 548 batches submitted to accumulate stats from 35072 documents (646017 virtual)
2021-07-19 12:05:46,110 : INFO : 550 batches submitted to accumulate stats from 35200 documents 

2021-07-19 12:06:40,596 : INFO : 628 batches submitted to accumulate stats from 40192 documents (702195 virtual)
2021-07-19 12:06:40,746 : INFO : 629 batches submitted to accumulate stats from 40256 documents (702201 virtual)
2021-07-19 12:06:41,227 : INFO : 630 batches submitted to accumulate stats from 40320 documents (702445 virtual)
2021-07-19 12:06:42,365 : INFO : 631 batches submitted to accumulate stats from 40384 documents (702509 virtual)
2021-07-19 12:06:42,594 : INFO : 632 batches submitted to accumulate stats from 40448 documents (703125 virtual)
2021-07-19 12:06:42,951 : INFO : 633 batches submitted to accumulate stats from 40512 documents (703400 virtual)
2021-07-19 12:06:44,809 : INFO : 636 batches submitted to accumulate stats from 40704 documents (703271 virtual)
2021-07-19 12:06:45,865 : INFO : 637 batches submitted to accumulate stats from 40768 documents (703835 virtual)
2021-07-19 12:06:46,509 : INFO : 638 batches submitted to accumulate stats from 40832 documents 

2021-07-19 12:07:43,719 : INFO : 728 batches submitted to accumulate stats from 46592 documents (752550 virtual)
2021-07-19 12:07:45,429 : INFO : 730 batches submitted to accumulate stats from 46720 documents (753081 virtual)
2021-07-19 12:07:45,928 : INFO : 731 batches submitted to accumulate stats from 46784 documents (754781 virtual)
2021-07-19 12:07:46,033 : INFO : 732 batches submitted to accumulate stats from 46848 documents (755753 virtual)
2021-07-19 12:07:47,726 : INFO : 733 batches submitted to accumulate stats from 46912 documents (756701 virtual)
2021-07-19 12:07:47,750 : INFO : 734 batches submitted to accumulate stats from 46976 documents (757416 virtual)
2021-07-19 12:07:47,869 : INFO : 735 batches submitted to accumulate stats from 47040 documents (757966 virtual)
2021-07-19 12:07:49,767 : INFO : 736 batches submitted to accumulate stats from 47104 documents (758806 virtual)
2021-07-19 12:07:50,018 : INFO : 737 batches submitted to accumulate stats from 47168 documents 

2021-07-19 12:08:52,953 : INFO : 836 batches submitted to accumulate stats from 53504 documents (798107 virtual)
2021-07-19 12:08:54,608 : INFO : 839 batches submitted to accumulate stats from 53696 documents (797613 virtual)
2021-07-19 12:08:55,158 : INFO : 840 batches submitted to accumulate stats from 53760 documents (800106 virtual)
2021-07-19 12:08:56,091 : INFO : 841 batches submitted to accumulate stats from 53824 documents (803253 virtual)
2021-07-19 12:08:57,161 : INFO : 843 batches submitted to accumulate stats from 53952 documents (803963 virtual)
2021-07-19 12:08:57,641 : INFO : 844 batches submitted to accumulate stats from 54016 documents (804293 virtual)
2021-07-19 12:08:59,981 : INFO : 846 batches submitted to accumulate stats from 54144 documents (803990 virtual)
2021-07-19 12:09:03,374 : INFO : 852 batches submitted to accumulate stats from 54528 documents (803109 virtual)
2021-07-19 12:09:05,165 : INFO : 856 batches submitted to accumulate stats from 54784 documents 

2021-07-19 12:10:27,637 : INFO : 1000 batches submitted to accumulate stats from 64000 documents (810197 virtual)
2021-07-19 12:10:30,767 : INFO : 1004 batches submitted to accumulate stats from 64256 documents (809529 virtual)
2021-07-19 12:10:31,246 : INFO : 1006 batches submitted to accumulate stats from 64384 documents (808861 virtual)
2021-07-19 12:10:32,129 : INFO : 1007 batches submitted to accumulate stats from 64448 documents (810423 virtual)
2021-07-19 12:10:37,628 : INFO : 1018 batches submitted to accumulate stats from 65152 documents (803717 virtual)
2021-07-19 12:10:40,640 : INFO : 1023 batches submitted to accumulate stats from 65472 documents (802351 virtual)
2021-07-19 12:10:45,003 : INFO : 1031 batches submitted to accumulate stats from 65984 documents (799331 virtual)
2021-07-19 12:10:46,325 : INFO : 1034 batches submitted to accumulate stats from 66176 documents (798533 virtual)
2021-07-19 12:10:47,328 : INFO : 1035 batches submitted to accumulate stats from 66240 d

12:12:46Starting evaluation of model models/model_rs1024k144


2021-07-19 12:12:46,809 : INFO : 1 batches submitted to accumulate stats from 64 documents (1275 virtual)
2021-07-19 12:12:46,819 : INFO : 2 batches submitted to accumulate stats from 128 documents (2284 virtual)
2021-07-19 12:12:46,831 : INFO : 3 batches submitted to accumulate stats from 192 documents (2853 virtual)
2021-07-19 12:12:48,222 : INFO : 5 batches submitted to accumulate stats from 320 documents (5536 virtual)
2021-07-19 12:12:48,236 : INFO : 6 batches submitted to accumulate stats from 384 documents (7420 virtual)
2021-07-19 12:12:50,541 : INFO : 7 batches submitted to accumulate stats from 448 documents (9671 virtual)
2021-07-19 12:12:50,708 : INFO : 8 batches submitted to accumulate stats from 512 documents (12615 virtual)
2021-07-19 12:12:50,888 : INFO : 9 batches submitted to accumulate stats from 576 documents (14328 virtual)
2021-07-19 12:12:52,131 : INFO : 10 batches submitted to accumulate stats from 640 documents (16616 virtual)
2021-07-19 12:12:53,765 : INFO : 1

2021-07-19 12:13:58,830 : INFO : 77 batches submitted to accumulate stats from 4928 documents (125619 virtual)
2021-07-19 12:13:59,988 : INFO : 78 batches submitted to accumulate stats from 4992 documents (127133 virtual)
2021-07-19 12:14:00,002 : INFO : 79 batches submitted to accumulate stats from 5056 documents (129278 virtual)
2021-07-19 12:14:01,722 : INFO : 80 batches submitted to accumulate stats from 5120 documents (131307 virtual)
2021-07-19 12:14:02,860 : INFO : 81 batches submitted to accumulate stats from 5184 documents (132851 virtual)
2021-07-19 12:14:03,727 : INFO : 82 batches submitted to accumulate stats from 5248 documents (134663 virtual)
2021-07-19 12:14:03,995 : INFO : 83 batches submitted to accumulate stats from 5312 documents (135641 virtual)
2021-07-19 12:14:06,005 : INFO : 84 batches submitted to accumulate stats from 5376 documents (136698 virtual)
2021-07-19 12:14:06,829 : INFO : 85 batches submitted to accumulate stats from 5440 documents (137698 virtual)
2

2021-07-19 12:15:12,914 : INFO : 155 batches submitted to accumulate stats from 9920 documents (237833 virtual)
2021-07-19 12:15:13,217 : INFO : 156 batches submitted to accumulate stats from 9984 documents (238949 virtual)
2021-07-19 12:15:14,106 : INFO : 157 batches submitted to accumulate stats from 10048 documents (239931 virtual)
2021-07-19 12:15:15,191 : INFO : 158 batches submitted to accumulate stats from 10112 documents (240912 virtual)
2021-07-19 12:15:15,351 : INFO : 159 batches submitted to accumulate stats from 10176 documents (242289 virtual)
2021-07-19 12:15:16,574 : INFO : 160 batches submitted to accumulate stats from 10240 documents (243704 virtual)
2021-07-19 12:15:18,117 : INFO : 161 batches submitted to accumulate stats from 10304 documents (246287 virtual)
2021-07-19 12:15:18,498 : INFO : 162 batches submitted to accumulate stats from 10368 documents (247196 virtual)
2021-07-19 12:15:19,226 : INFO : 163 batches submitted to accumulate stats from 10432 documents (2

2021-07-19 12:16:20,573 : INFO : 228 batches submitted to accumulate stats from 14592 documents (341682 virtual)
2021-07-19 12:16:21,132 : INFO : 229 batches submitted to accumulate stats from 14656 documents (342001 virtual)
2021-07-19 12:16:21,810 : INFO : 230 batches submitted to accumulate stats from 14720 documents (342884 virtual)
2021-07-19 12:16:23,777 : INFO : 231 batches submitted to accumulate stats from 14784 documents (343770 virtual)
2021-07-19 12:16:24,283 : INFO : 232 batches submitted to accumulate stats from 14848 documents (344295 virtual)
2021-07-19 12:16:24,581 : INFO : 233 batches submitted to accumulate stats from 14912 documents (344787 virtual)
2021-07-19 12:16:26,296 : INFO : 234 batches submitted to accumulate stats from 14976 documents (345687 virtual)
2021-07-19 12:16:26,555 : INFO : 235 batches submitted to accumulate stats from 15040 documents (346616 virtual)
2021-07-19 12:16:26,637 : INFO : 236 batches submitted to accumulate stats from 15104 documents 

2021-07-19 12:17:27,163 : INFO : 306 batches submitted to accumulate stats from 19584 documents (419750 virtual)
2021-07-19 12:17:28,239 : INFO : 308 batches submitted to accumulate stats from 19712 documents (420858 virtual)
2021-07-19 12:17:29,656 : INFO : 309 batches submitted to accumulate stats from 19776 documents (422259 virtual)
2021-07-19 12:17:30,981 : INFO : 310 batches submitted to accumulate stats from 19840 documents (423363 virtual)
2021-07-19 12:17:31,217 : INFO : 311 batches submitted to accumulate stats from 19904 documents (423892 virtual)
2021-07-19 12:17:32,227 : INFO : 312 batches submitted to accumulate stats from 19968 documents (424942 virtual)
2021-07-19 12:17:32,703 : INFO : 313 batches submitted to accumulate stats from 20032 documents (425677 virtual)
2021-07-19 12:17:33,873 : INFO : 314 batches submitted to accumulate stats from 20096 documents (426491 virtual)
2021-07-19 12:17:35,312 : INFO : 315 batches submitted to accumulate stats from 20160 documents 

2021-07-19 12:18:33,594 : INFO : 384 batches submitted to accumulate stats from 24576 documents (491707 virtual)
2021-07-19 12:18:33,752 : INFO : 385 batches submitted to accumulate stats from 24640 documents (492523 virtual)
2021-07-19 12:18:34,477 : INFO : 386 batches submitted to accumulate stats from 24704 documents (493359 virtual)
2021-07-19 12:18:35,596 : INFO : 387 batches submitted to accumulate stats from 24768 documents (494515 virtual)
2021-07-19 12:18:36,144 : INFO : 388 batches submitted to accumulate stats from 24832 documents (496222 virtual)
2021-07-19 12:18:36,226 : INFO : 389 batches submitted to accumulate stats from 24896 documents (499006 virtual)
2021-07-19 12:18:37,597 : INFO : 390 batches submitted to accumulate stats from 24960 documents (502813 virtual)
2021-07-19 12:18:38,467 : INFO : 391 batches submitted to accumulate stats from 25024 documents (509708 virtual)
2021-07-19 12:18:38,668 : INFO : 392 batches submitted to accumulate stats from 25088 documents 

2021-07-19 12:19:37,919 : INFO : 458 batches submitted to accumulate stats from 29312 documents (576634 virtual)
2021-07-19 12:19:39,065 : INFO : 459 batches submitted to accumulate stats from 29376 documents (576656 virtual)
2021-07-19 12:19:39,438 : INFO : 460 batches submitted to accumulate stats from 29440 documents (577583 virtual)
2021-07-19 12:19:40,425 : INFO : 461 batches submitted to accumulate stats from 29504 documents (579012 virtual)
2021-07-19 12:19:41,907 : INFO : 462 batches submitted to accumulate stats from 29568 documents (580541 virtual)
2021-07-19 12:19:42,305 : INFO : 463 batches submitted to accumulate stats from 29632 documents (581301 virtual)
2021-07-19 12:19:42,522 : INFO : 464 batches submitted to accumulate stats from 29696 documents (582062 virtual)
2021-07-19 12:19:43,896 : INFO : 465 batches submitted to accumulate stats from 29760 documents (582436 virtual)
2021-07-19 12:19:44,704 : INFO : 466 batches submitted to accumulate stats from 29824 documents 

2021-07-19 12:20:44,417 : INFO : 541 batches submitted to accumulate stats from 34624 documents (642386 virtual)
2021-07-19 12:20:45,535 : INFO : 542 batches submitted to accumulate stats from 34688 documents (643343 virtual)
2021-07-19 12:20:46,872 : INFO : 543 batches submitted to accumulate stats from 34752 documents (643481 virtual)
2021-07-19 12:20:47,044 : INFO : 544 batches submitted to accumulate stats from 34816 documents (643572 virtual)
2021-07-19 12:20:47,498 : INFO : 545 batches submitted to accumulate stats from 34880 documents (644269 virtual)
2021-07-19 12:20:49,324 : INFO : 546 batches submitted to accumulate stats from 34944 documents (644724 virtual)
2021-07-19 12:20:49,747 : INFO : 547 batches submitted to accumulate stats from 35008 documents (645459 virtual)
2021-07-19 12:20:50,010 : INFO : 548 batches submitted to accumulate stats from 35072 documents (646017 virtual)
2021-07-19 12:20:51,784 : INFO : 550 batches submitted to accumulate stats from 35200 documents 

2021-07-19 12:21:54,024 : INFO : 628 batches submitted to accumulate stats from 40192 documents (702195 virtual)
2021-07-19 12:21:54,526 : INFO : 629 batches submitted to accumulate stats from 40256 documents (702201 virtual)
2021-07-19 12:21:55,160 : INFO : 630 batches submitted to accumulate stats from 40320 documents (702445 virtual)
2021-07-19 12:21:56,066 : INFO : 631 batches submitted to accumulate stats from 40384 documents (702509 virtual)
2021-07-19 12:21:56,638 : INFO : 632 batches submitted to accumulate stats from 40448 documents (703125 virtual)
2021-07-19 12:21:57,095 : INFO : 633 batches submitted to accumulate stats from 40512 documents (703400 virtual)
2021-07-19 12:21:59,257 : INFO : 636 batches submitted to accumulate stats from 40704 documents (703271 virtual)
2021-07-19 12:22:00,073 : INFO : 637 batches submitted to accumulate stats from 40768 documents (703835 virtual)
2021-07-19 12:22:01,109 : INFO : 638 batches submitted to accumulate stats from 40832 documents 

2021-07-19 12:23:08,497 : INFO : 728 batches submitted to accumulate stats from 46592 documents (752550 virtual)
2021-07-19 12:23:10,441 : INFO : 730 batches submitted to accumulate stats from 46720 documents (753081 virtual)
2021-07-19 12:23:10,601 : INFO : 731 batches submitted to accumulate stats from 46784 documents (754781 virtual)
2021-07-19 12:23:11,234 : INFO : 732 batches submitted to accumulate stats from 46848 documents (755753 virtual)
2021-07-19 12:23:12,480 : INFO : 733 batches submitted to accumulate stats from 46912 documents (756701 virtual)
2021-07-19 12:23:13,207 : INFO : 734 batches submitted to accumulate stats from 46976 documents (757416 virtual)
2021-07-19 12:23:13,343 : INFO : 735 batches submitted to accumulate stats from 47040 documents (757966 virtual)
2021-07-19 12:23:14,799 : INFO : 736 batches submitted to accumulate stats from 47104 documents (758806 virtual)
2021-07-19 12:23:15,959 : INFO : 737 batches submitted to accumulate stats from 47168 documents 

2021-07-19 12:24:27,965 : INFO : 836 batches submitted to accumulate stats from 53504 documents (798107 virtual)
2021-07-19 12:24:29,957 : INFO : 839 batches submitted to accumulate stats from 53696 documents (797613 virtual)
2021-07-19 12:24:29,981 : INFO : 840 batches submitted to accumulate stats from 53760 documents (800106 virtual)
2021-07-19 12:24:31,735 : INFO : 841 batches submitted to accumulate stats from 53824 documents (803253 virtual)
2021-07-19 12:24:32,933 : INFO : 843 batches submitted to accumulate stats from 53952 documents (803963 virtual)
2021-07-19 12:24:33,694 : INFO : 844 batches submitted to accumulate stats from 54016 documents (804293 virtual)
2021-07-19 12:24:35,915 : INFO : 846 batches submitted to accumulate stats from 54144 documents (803990 virtual)
2021-07-19 12:24:40,034 : INFO : 852 batches submitted to accumulate stats from 54528 documents (803109 virtual)
2021-07-19 12:24:42,060 : INFO : 856 batches submitted to accumulate stats from 54784 documents 

2021-07-19 12:26:18,245 : INFO : 1000 batches submitted to accumulate stats from 64000 documents (810197 virtual)
2021-07-19 12:26:21,892 : INFO : 1004 batches submitted to accumulate stats from 64256 documents (809529 virtual)
2021-07-19 12:26:22,471 : INFO : 1006 batches submitted to accumulate stats from 64384 documents (808861 virtual)
2021-07-19 12:26:23,473 : INFO : 1007 batches submitted to accumulate stats from 64448 documents (810423 virtual)
2021-07-19 12:26:30,112 : INFO : 1018 batches submitted to accumulate stats from 65152 documents (803717 virtual)
2021-07-19 12:26:32,931 : INFO : 1023 batches submitted to accumulate stats from 65472 documents (802351 virtual)
2021-07-19 12:26:37,798 : INFO : 1031 batches submitted to accumulate stats from 65984 documents (799331 virtual)
2021-07-19 12:26:39,390 : INFO : 1034 batches submitted to accumulate stats from 66176 documents (798533 virtual)
2021-07-19 12:26:40,989 : INFO : 1035 batches submitted to accumulate stats from 66240 d

12:29:00Starting evaluation of model models/model_rs1024k16


2021-07-19 12:29:01,238 : INFO : 1 batches submitted to accumulate stats from 64 documents (1275 virtual)
2021-07-19 12:29:01,246 : INFO : 2 batches submitted to accumulate stats from 128 documents (2284 virtual)
2021-07-19 12:29:01,258 : INFO : 3 batches submitted to accumulate stats from 192 documents (2853 virtual)
2021-07-19 12:29:02,765 : INFO : 5 batches submitted to accumulate stats from 320 documents (5536 virtual)
2021-07-19 12:29:02,772 : INFO : 6 batches submitted to accumulate stats from 384 documents (7420 virtual)
2021-07-19 12:29:03,148 : INFO : 7 batches submitted to accumulate stats from 448 documents (9671 virtual)
2021-07-19 12:29:03,198 : INFO : 8 batches submitted to accumulate stats from 512 documents (12615 virtual)
2021-07-19 12:29:03,224 : INFO : 9 batches submitted to accumulate stats from 576 documents (14328 virtual)
2021-07-19 12:29:03,495 : INFO : 10 batches submitted to accumulate stats from 640 documents (16616 virtual)
2021-07-19 12:29:03,743 : INFO : 1

2021-07-19 12:29:15,258 : INFO : 77 batches submitted to accumulate stats from 4928 documents (125619 virtual)
2021-07-19 12:29:15,405 : INFO : 78 batches submitted to accumulate stats from 4992 documents (127133 virtual)
2021-07-19 12:29:15,544 : INFO : 79 batches submitted to accumulate stats from 5056 documents (129278 virtual)
2021-07-19 12:29:15,786 : INFO : 80 batches submitted to accumulate stats from 5120 documents (131307 virtual)
2021-07-19 12:29:15,890 : INFO : 81 batches submitted to accumulate stats from 5184 documents (132851 virtual)
2021-07-19 12:29:16,198 : INFO : 82 batches submitted to accumulate stats from 5248 documents (134663 virtual)
2021-07-19 12:29:16,264 : INFO : 83 batches submitted to accumulate stats from 5312 documents (135641 virtual)
2021-07-19 12:29:16,398 : INFO : 84 batches submitted to accumulate stats from 5376 documents (136698 virtual)
2021-07-19 12:29:16,805 : INFO : 85 batches submitted to accumulate stats from 5440 documents (137698 virtual)
2

2021-07-19 12:29:28,529 : INFO : 155 batches submitted to accumulate stats from 9920 documents (237833 virtual)
2021-07-19 12:29:28,597 : INFO : 156 batches submitted to accumulate stats from 9984 documents (238949 virtual)
2021-07-19 12:29:28,607 : INFO : 157 batches submitted to accumulate stats from 10048 documents (239931 virtual)
2021-07-19 12:29:28,934 : INFO : 158 batches submitted to accumulate stats from 10112 documents (240912 virtual)
2021-07-19 12:29:28,945 : INFO : 159 batches submitted to accumulate stats from 10176 documents (242289 virtual)
2021-07-19 12:29:29,038 : INFO : 160 batches submitted to accumulate stats from 10240 documents (243704 virtual)
2021-07-19 12:29:29,398 : INFO : 161 batches submitted to accumulate stats from 10304 documents (246287 virtual)
2021-07-19 12:29:29,413 : INFO : 162 batches submitted to accumulate stats from 10368 documents (247196 virtual)
2021-07-19 12:29:29,501 : INFO : 163 batches submitted to accumulate stats from 10432 documents (2

2021-07-19 12:29:41,547 : INFO : 228 batches submitted to accumulate stats from 14592 documents (341682 virtual)
2021-07-19 12:29:41,783 : INFO : 229 batches submitted to accumulate stats from 14656 documents (342001 virtual)
2021-07-19 12:29:41,948 : INFO : 230 batches submitted to accumulate stats from 14720 documents (342884 virtual)
2021-07-19 12:29:42,131 : INFO : 231 batches submitted to accumulate stats from 14784 documents (343770 virtual)
2021-07-19 12:29:42,353 : INFO : 232 batches submitted to accumulate stats from 14848 documents (344295 virtual)
2021-07-19 12:29:42,418 : INFO : 233 batches submitted to accumulate stats from 14912 documents (344787 virtual)
2021-07-19 12:29:42,623 : INFO : 234 batches submitted to accumulate stats from 14976 documents (345687 virtual)
2021-07-19 12:29:42,700 : INFO : 235 batches submitted to accumulate stats from 15040 documents (346616 virtual)
2021-07-19 12:29:42,821 : INFO : 236 batches submitted to accumulate stats from 15104 documents 

2021-07-19 12:29:53,375 : INFO : 306 batches submitted to accumulate stats from 19584 documents (419750 virtual)
2021-07-19 12:29:53,651 : INFO : 308 batches submitted to accumulate stats from 19712 documents (420858 virtual)
2021-07-19 12:29:53,845 : INFO : 309 batches submitted to accumulate stats from 19776 documents (422259 virtual)
2021-07-19 12:29:54,002 : INFO : 310 batches submitted to accumulate stats from 19840 documents (423363 virtual)
2021-07-19 12:29:54,088 : INFO : 311 batches submitted to accumulate stats from 19904 documents (423892 virtual)
2021-07-19 12:29:54,276 : INFO : 312 batches submitted to accumulate stats from 19968 documents (424942 virtual)
2021-07-19 12:29:54,315 : INFO : 313 batches submitted to accumulate stats from 20032 documents (425677 virtual)
2021-07-19 12:29:54,566 : INFO : 314 batches submitted to accumulate stats from 20096 documents (426491 virtual)
2021-07-19 12:29:54,759 : INFO : 315 batches submitted to accumulate stats from 20160 documents 

2021-07-19 12:30:05,146 : INFO : 384 batches submitted to accumulate stats from 24576 documents (491707 virtual)
2021-07-19 12:30:05,198 : INFO : 385 batches submitted to accumulate stats from 24640 documents (492523 virtual)
2021-07-19 12:30:05,476 : INFO : 386 batches submitted to accumulate stats from 24704 documents (493359 virtual)
2021-07-19 12:30:05,494 : INFO : 387 batches submitted to accumulate stats from 24768 documents (494515 virtual)
2021-07-19 12:30:05,633 : INFO : 388 batches submitted to accumulate stats from 24832 documents (496222 virtual)
2021-07-19 12:30:05,741 : INFO : 389 batches submitted to accumulate stats from 24896 documents (499006 virtual)
2021-07-19 12:30:05,868 : INFO : 390 batches submitted to accumulate stats from 24960 documents (502813 virtual)
2021-07-19 12:30:06,037 : INFO : 391 batches submitted to accumulate stats from 25024 documents (509708 virtual)
2021-07-19 12:30:06,136 : INFO : 392 batches submitted to accumulate stats from 25088 documents 

2021-07-19 12:30:16,345 : INFO : 458 batches submitted to accumulate stats from 29312 documents (576634 virtual)
2021-07-19 12:30:16,611 : INFO : 459 batches submitted to accumulate stats from 29376 documents (576656 virtual)
2021-07-19 12:30:16,752 : INFO : 460 batches submitted to accumulate stats from 29440 documents (577583 virtual)
2021-07-19 12:30:16,812 : INFO : 461 batches submitted to accumulate stats from 29504 documents (579012 virtual)
2021-07-19 12:30:17,102 : INFO : 462 batches submitted to accumulate stats from 29568 documents (580541 virtual)
2021-07-19 12:30:17,182 : INFO : 463 batches submitted to accumulate stats from 29632 documents (581301 virtual)
2021-07-19 12:30:17,227 : INFO : 464 batches submitted to accumulate stats from 29696 documents (582062 virtual)
2021-07-19 12:30:17,461 : INFO : 465 batches submitted to accumulate stats from 29760 documents (582436 virtual)
2021-07-19 12:30:17,616 : INFO : 466 batches submitted to accumulate stats from 29824 documents 

2021-07-19 12:30:28,194 : INFO : 541 batches submitted to accumulate stats from 34624 documents (642386 virtual)
2021-07-19 12:30:28,314 : INFO : 542 batches submitted to accumulate stats from 34688 documents (643343 virtual)
2021-07-19 12:30:28,545 : INFO : 543 batches submitted to accumulate stats from 34752 documents (643481 virtual)
2021-07-19 12:30:28,655 : INFO : 544 batches submitted to accumulate stats from 34816 documents (643572 virtual)
2021-07-19 12:30:28,676 : INFO : 545 batches submitted to accumulate stats from 34880 documents (644269 virtual)
2021-07-19 12:30:28,987 : INFO : 546 batches submitted to accumulate stats from 34944 documents (644724 virtual)
2021-07-19 12:30:29,088 : INFO : 547 batches submitted to accumulate stats from 35008 documents (645459 virtual)
2021-07-19 12:30:29,098 : INFO : 548 batches submitted to accumulate stats from 35072 documents (646017 virtual)
2021-07-19 12:30:29,425 : INFO : 550 batches submitted to accumulate stats from 35200 documents 

2021-07-19 12:30:40,445 : INFO : 628 batches submitted to accumulate stats from 40192 documents (702195 virtual)
2021-07-19 12:30:40,604 : INFO : 629 batches submitted to accumulate stats from 40256 documents (702201 virtual)
2021-07-19 12:30:40,629 : INFO : 630 batches submitted to accumulate stats from 40320 documents (702445 virtual)
2021-07-19 12:30:40,838 : INFO : 631 batches submitted to accumulate stats from 40384 documents (702509 virtual)
2021-07-19 12:30:40,960 : INFO : 632 batches submitted to accumulate stats from 40448 documents (703125 virtual)
2021-07-19 12:30:40,993 : INFO : 633 batches submitted to accumulate stats from 40512 documents (703400 virtual)
2021-07-19 12:30:41,335 : INFO : 636 batches submitted to accumulate stats from 40704 documents (703271 virtual)
2021-07-19 12:30:41,590 : INFO : 637 batches submitted to accumulate stats from 40768 documents (703835 virtual)
2021-07-19 12:30:41,713 : INFO : 638 batches submitted to accumulate stats from 40832 documents 

2021-07-19 12:30:53,247 : INFO : 728 batches submitted to accumulate stats from 46592 documents (752550 virtual)
2021-07-19 12:30:53,569 : INFO : 730 batches submitted to accumulate stats from 46720 documents (753081 virtual)
2021-07-19 12:30:53,705 : INFO : 731 batches submitted to accumulate stats from 46784 documents (754781 virtual)
2021-07-19 12:30:53,732 : INFO : 732 batches submitted to accumulate stats from 46848 documents (755753 virtual)
2021-07-19 12:30:54,025 : INFO : 733 batches submitted to accumulate stats from 46912 documents (756701 virtual)
2021-07-19 12:30:54,059 : INFO : 734 batches submitted to accumulate stats from 46976 documents (757416 virtual)
2021-07-19 12:30:54,124 : INFO : 735 batches submitted to accumulate stats from 47040 documents (757966 virtual)
2021-07-19 12:30:54,433 : INFO : 736 batches submitted to accumulate stats from 47104 documents (758806 virtual)
2021-07-19 12:30:54,550 : INFO : 737 batches submitted to accumulate stats from 47168 documents 

2021-07-19 12:31:06,914 : INFO : 836 batches submitted to accumulate stats from 53504 documents (798107 virtual)
2021-07-19 12:31:07,241 : INFO : 839 batches submitted to accumulate stats from 53696 documents (797613 virtual)
2021-07-19 12:31:07,307 : INFO : 840 batches submitted to accumulate stats from 53760 documents (800106 virtual)
2021-07-19 12:31:07,549 : INFO : 841 batches submitted to accumulate stats from 53824 documents (803253 virtual)
2021-07-19 12:31:07,735 : INFO : 843 batches submitted to accumulate stats from 53952 documents (803963 virtual)
2021-07-19 12:31:07,841 : INFO : 844 batches submitted to accumulate stats from 54016 documents (804293 virtual)
2021-07-19 12:31:08,204 : INFO : 846 batches submitted to accumulate stats from 54144 documents (803990 virtual)
2021-07-19 12:31:08,888 : INFO : 852 batches submitted to accumulate stats from 54528 documents (803109 virtual)
2021-07-19 12:31:09,226 : INFO : 856 batches submitted to accumulate stats from 54784 documents 

2021-07-19 12:31:25,296 : INFO : 1000 batches submitted to accumulate stats from 64000 documents (810197 virtual)
2021-07-19 12:31:25,770 : INFO : 1004 batches submitted to accumulate stats from 64256 documents (809529 virtual)
2021-07-19 12:31:25,941 : INFO : 1006 batches submitted to accumulate stats from 64384 documents (808861 virtual)
2021-07-19 12:31:26,077 : INFO : 1007 batches submitted to accumulate stats from 64448 documents (810423 virtual)
2021-07-19 12:31:27,126 : INFO : 1018 batches submitted to accumulate stats from 65152 documents (803717 virtual)
2021-07-19 12:31:27,546 : INFO : 1023 batches submitted to accumulate stats from 65472 documents (802351 virtual)
2021-07-19 12:31:28,334 : INFO : 1031 batches submitted to accumulate stats from 65984 documents (799331 virtual)
2021-07-19 12:31:28,579 : INFO : 1034 batches submitted to accumulate stats from 66176 documents (798533 virtual)
2021-07-19 12:31:28,790 : INFO : 1035 batches submitted to accumulate stats from 66240 d

12:31:48Starting evaluation of model models/model_rs1024k169


2021-07-19 12:31:49,451 : INFO : 1 batches submitted to accumulate stats from 64 documents (1275 virtual)
2021-07-19 12:31:49,469 : INFO : 2 batches submitted to accumulate stats from 128 documents (2284 virtual)
2021-07-19 12:31:49,487 : INFO : 3 batches submitted to accumulate stats from 192 documents (2853 virtual)
2021-07-19 12:31:50,570 : INFO : 5 batches submitted to accumulate stats from 320 documents (5536 virtual)
2021-07-19 12:31:50,586 : INFO : 6 batches submitted to accumulate stats from 384 documents (7420 virtual)
2021-07-19 12:31:53,286 : INFO : 7 batches submitted to accumulate stats from 448 documents (9671 virtual)
2021-07-19 12:31:53,340 : INFO : 8 batches submitted to accumulate stats from 512 documents (12615 virtual)
2021-07-19 12:31:53,640 : INFO : 9 batches submitted to accumulate stats from 576 documents (14328 virtual)
2021-07-19 12:31:55,255 : INFO : 10 batches submitted to accumulate stats from 640 documents (16616 virtual)
2021-07-19 12:31:56,946 : INFO : 1

2021-07-19 12:33:14,268 : INFO : 77 batches submitted to accumulate stats from 4928 documents (125619 virtual)
2021-07-19 12:33:15,587 : INFO : 78 batches submitted to accumulate stats from 4992 documents (127133 virtual)
2021-07-19 12:33:15,820 : INFO : 79 batches submitted to accumulate stats from 5056 documents (129278 virtual)
2021-07-19 12:33:17,550 : INFO : 80 batches submitted to accumulate stats from 5120 documents (131307 virtual)
2021-07-19 12:33:18,596 : INFO : 81 batches submitted to accumulate stats from 5184 documents (132851 virtual)
2021-07-19 12:33:19,826 : INFO : 82 batches submitted to accumulate stats from 5248 documents (134663 virtual)
2021-07-19 12:33:20,257 : INFO : 83 batches submitted to accumulate stats from 5312 documents (135641 virtual)
2021-07-19 12:33:21,822 : INFO : 84 batches submitted to accumulate stats from 5376 documents (136698 virtual)
2021-07-19 12:33:23,458 : INFO : 85 batches submitted to accumulate stats from 5440 documents (137698 virtual)
2

2021-07-19 12:34:38,864 : INFO : 155 batches submitted to accumulate stats from 9920 documents (237833 virtual)
2021-07-19 12:34:39,653 : INFO : 156 batches submitted to accumulate stats from 9984 documents (238949 virtual)
2021-07-19 12:34:39,753 : INFO : 157 batches submitted to accumulate stats from 10048 documents (239931 virtual)
2021-07-19 12:34:41,716 : INFO : 158 batches submitted to accumulate stats from 10112 documents (240912 virtual)
2021-07-19 12:34:42,013 : INFO : 159 batches submitted to accumulate stats from 10176 documents (242289 virtual)
2021-07-19 12:34:42,683 : INFO : 160 batches submitted to accumulate stats from 10240 documents (243704 virtual)
2021-07-19 12:34:45,342 : INFO : 161 batches submitted to accumulate stats from 10304 documents (246287 virtual)
2021-07-19 12:34:45,671 : INFO : 162 batches submitted to accumulate stats from 10368 documents (247196 virtual)
2021-07-19 12:34:46,109 : INFO : 163 batches submitted to accumulate stats from 10432 documents (2

2021-07-19 12:35:59,043 : INFO : 228 batches submitted to accumulate stats from 14592 documents (341682 virtual)
2021-07-19 12:35:59,463 : INFO : 229 batches submitted to accumulate stats from 14656 documents (342001 virtual)
2021-07-19 12:36:00,720 : INFO : 230 batches submitted to accumulate stats from 14720 documents (342884 virtual)
2021-07-19 12:36:02,881 : INFO : 231 batches submitted to accumulate stats from 14784 documents (343770 virtual)
2021-07-19 12:36:03,303 : INFO : 232 batches submitted to accumulate stats from 14848 documents (344295 virtual)
2021-07-19 12:36:03,505 : INFO : 233 batches submitted to accumulate stats from 14912 documents (344787 virtual)
2021-07-19 12:36:05,535 : INFO : 234 batches submitted to accumulate stats from 14976 documents (345687 virtual)
2021-07-19 12:36:05,918 : INFO : 235 batches submitted to accumulate stats from 15040 documents (346616 virtual)
2021-07-19 12:36:06,259 : INFO : 236 batches submitted to accumulate stats from 15104 documents 

2021-07-19 12:37:15,418 : INFO : 306 batches submitted to accumulate stats from 19584 documents (419750 virtual)
2021-07-19 12:37:16,343 : INFO : 308 batches submitted to accumulate stats from 19712 documents (420858 virtual)
2021-07-19 12:37:18,518 : INFO : 309 batches submitted to accumulate stats from 19776 documents (422259 virtual)
2021-07-19 12:37:19,316 : INFO : 310 batches submitted to accumulate stats from 19840 documents (423363 virtual)
2021-07-19 12:37:19,917 : INFO : 311 batches submitted to accumulate stats from 19904 documents (423892 virtual)
2021-07-19 12:37:21,163 : INFO : 312 batches submitted to accumulate stats from 19968 documents (424942 virtual)
2021-07-19 12:37:21,812 : INFO : 313 batches submitted to accumulate stats from 20032 documents (425677 virtual)
2021-07-19 12:37:22,836 : INFO : 314 batches submitted to accumulate stats from 20096 documents (426491 virtual)
2021-07-19 12:37:24,539 : INFO : 315 batches submitted to accumulate stats from 20160 documents 

2021-07-19 12:38:30,678 : INFO : 384 batches submitted to accumulate stats from 24576 documents (491707 virtual)
2021-07-19 12:38:31,624 : INFO : 385 batches submitted to accumulate stats from 24640 documents (492523 virtual)
2021-07-19 12:38:32,002 : INFO : 386 batches submitted to accumulate stats from 24704 documents (493359 virtual)
2021-07-19 12:38:33,680 : INFO : 387 batches submitted to accumulate stats from 24768 documents (494515 virtual)
2021-07-19 12:38:33,734 : INFO : 388 batches submitted to accumulate stats from 24832 documents (496222 virtual)
2021-07-19 12:38:33,890 : INFO : 389 batches submitted to accumulate stats from 24896 documents (499006 virtual)
2021-07-19 12:38:35,992 : INFO : 390 batches submitted to accumulate stats from 24960 documents (502813 virtual)
2021-07-19 12:38:36,400 : INFO : 391 batches submitted to accumulate stats from 25024 documents (509708 virtual)
2021-07-19 12:38:36,662 : INFO : 392 batches submitted to accumulate stats from 25088 documents 

2021-07-19 12:39:47,307 : INFO : 458 batches submitted to accumulate stats from 29312 documents (576634 virtual)
2021-07-19 12:39:48,780 : INFO : 459 batches submitted to accumulate stats from 29376 documents (576656 virtual)
2021-07-19 12:39:49,788 : INFO : 460 batches submitted to accumulate stats from 29440 documents (577583 virtual)
2021-07-19 12:39:50,202 : INFO : 461 batches submitted to accumulate stats from 29504 documents (579012 virtual)
2021-07-19 12:39:51,845 : INFO : 462 batches submitted to accumulate stats from 29568 documents (580541 virtual)
2021-07-19 12:39:52,662 : INFO : 463 batches submitted to accumulate stats from 29632 documents (581301 virtual)
2021-07-19 12:39:52,811 : INFO : 464 batches submitted to accumulate stats from 29696 documents (582062 virtual)
2021-07-19 12:39:54,228 : INFO : 465 batches submitted to accumulate stats from 29760 documents (582436 virtual)
2021-07-19 12:39:55,267 : INFO : 466 batches submitted to accumulate stats from 29824 documents 

2021-07-19 12:41:07,627 : INFO : 541 batches submitted to accumulate stats from 34624 documents (642386 virtual)
2021-07-19 12:41:08,946 : INFO : 542 batches submitted to accumulate stats from 34688 documents (643343 virtual)
2021-07-19 12:41:10,643 : INFO : 543 batches submitted to accumulate stats from 34752 documents (643481 virtual)
2021-07-19 12:41:10,916 : INFO : 544 batches submitted to accumulate stats from 34816 documents (643572 virtual)
2021-07-19 12:41:11,149 : INFO : 545 batches submitted to accumulate stats from 34880 documents (644269 virtual)
2021-07-19 12:41:14,228 : INFO : 546 batches submitted to accumulate stats from 34944 documents (644724 virtual)
2021-07-19 12:41:14,830 : INFO : 547 batches submitted to accumulate stats from 35008 documents (645459 virtual)
2021-07-19 12:41:14,903 : INFO : 548 batches submitted to accumulate stats from 35072 documents (646017 virtual)
2021-07-19 12:41:17,708 : INFO : 550 batches submitted to accumulate stats from 35200 documents 

2021-07-19 12:42:29,500 : INFO : 628 batches submitted to accumulate stats from 40192 documents (702195 virtual)
2021-07-19 12:42:29,982 : INFO : 629 batches submitted to accumulate stats from 40256 documents (702201 virtual)
2021-07-19 12:42:30,711 : INFO : 630 batches submitted to accumulate stats from 40320 documents (702445 virtual)
2021-07-19 12:42:31,854 : INFO : 631 batches submitted to accumulate stats from 40384 documents (702509 virtual)
2021-07-19 12:42:32,370 : INFO : 632 batches submitted to accumulate stats from 40448 documents (703125 virtual)
2021-07-19 12:42:32,925 : INFO : 633 batches submitted to accumulate stats from 40512 documents (703400 virtual)
2021-07-19 12:42:35,328 : INFO : 636 batches submitted to accumulate stats from 40704 documents (703271 virtual)
2021-07-19 12:42:36,540 : INFO : 637 batches submitted to accumulate stats from 40768 documents (703835 virtual)
2021-07-19 12:42:37,548 : INFO : 638 batches submitted to accumulate stats from 40832 documents 

2021-07-19 12:43:54,008 : INFO : 728 batches submitted to accumulate stats from 46592 documents (752550 virtual)
2021-07-19 12:43:56,336 : INFO : 730 batches submitted to accumulate stats from 46720 documents (753081 virtual)
2021-07-19 12:43:56,650 : INFO : 731 batches submitted to accumulate stats from 46784 documents (754781 virtual)
2021-07-19 12:43:57,453 : INFO : 732 batches submitted to accumulate stats from 46848 documents (755753 virtual)
2021-07-19 12:43:58,845 : INFO : 733 batches submitted to accumulate stats from 46912 documents (756701 virtual)
2021-07-19 12:43:59,859 : INFO : 734 batches submitted to accumulate stats from 46976 documents (757416 virtual)
2021-07-19 12:44:00,011 : INFO : 735 batches submitted to accumulate stats from 47040 documents (757966 virtual)
2021-07-19 12:44:01,470 : INFO : 736 batches submitted to accumulate stats from 47104 documents (758806 virtual)
2021-07-19 12:44:03,059 : INFO : 737 batches submitted to accumulate stats from 47168 documents 

2021-07-19 12:45:24,768 : INFO : 836 batches submitted to accumulate stats from 53504 documents (798107 virtual)
2021-07-19 12:45:28,115 : INFO : 839 batches submitted to accumulate stats from 53696 documents (797613 virtual)
2021-07-19 12:45:29,233 : INFO : 840 batches submitted to accumulate stats from 53760 documents (800106 virtual)
2021-07-19 12:45:31,452 : INFO : 841 batches submitted to accumulate stats from 53824 documents (803253 virtual)


Save the preprocessed data, corpus and dictionary for later exploration  

In [None]:
df.to_csv('data/data_for_interpretation.csv')

with open('models/corpus.pkl', 'wb') as f:
    pickle.dump(corpus, f)
    
dictionary.save('models/dictionary')