# Notebook to extract hidden-states and attention heads activations from LSTM model predictions

In [24]:
import os
import glob
import torch
import gc
import numpy as np
import pandas as pd
from tqdm import tqdm
from model import LSTMExtractor
from sklearn.preprocessing import StandardScaler
from numpy import linalg as la
from tokenizer import tokenize, preprocess
from data import Dictionary, Corpus
from lstm_utils import read_yaml, save_yaml, batchify_text_with_memory_size, load

In [25]:
def check_folder(path):
    """Create adequate folders if necessary."""
    try:
        if not os.path.isdir(path):
            check_folder(os.path.dirname(path))
            os.mkdir(path)
    except:
        pass

In [26]:
def transform(activations, path, name, run_index, n_layers_hidden=1, hidden_size=300):
    """activations - surprisal - entropy
    """
    assert activations.values.shape[1] == ((n_layers_hidden) * hidden_size + 2)
    indexes = [[index*hidden_size, (index+1)*hidden_size] for index in range(n_layers_hidden)]
    for order in [2]:
        matrices = []
        for index in indexes:
            matrix = activations.values[:, index[0]:index[1]]
            #with_std = True if order=='std' else False
            #scaler = StandardScaler(with_mean=True, with_std=with_std)
            #scaler.fit(matrix)
            #matrix = scaler.transform(matrix)
            if order is not None and order != 'std':
                matrix = matrix / np.mean(la.norm(matrix, ord=order, axis=1))
            matrices.append(matrix)
        matrices = np.hstack(matrices)
        print(matrices.shape)
        matrices = np.hstack([matrices, activations.values[:, -2:]])
        print(matrices.shape)
        new_data = pd.DataFrame(matrices, columns=activations.columns)
        new_path = path + '_norm-' + str(order).replace('np.', '')
        check_folder(new_path)
        new_data.to_csv(os.path.join(new_path, name + '_run{}.csv'.format(run_index + 1)), index=False)


Defining variables:

In [17]:
template = '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/text_english_run*.txt' # path to text input
language = 'english'

In [5]:
name_template = 'weights_{}_embedding-size-{}_nhid-{}_nlayers-{}_dropout-{}_memory-size-{}_wiki-kristina_english.pt'
name_template = 'LSTM_embedding-size_600_nhid_300_nlayers_1_dropout_02_gutenberg_9_english.pt'

name_templates =[
#"LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_Gutenberg_big_+_wiki_+_lpp_2_english.pt",
#"LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_Gutenberg_big_+_wiki_2_english.pt",
#"LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_2_english.pt",
#"LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_2_englishless_epochs.pt",
#"LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_Gutenberg_5_englishless_epochs.pt",
#"LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_Gutenberg_2_englishless_epochs.pt",
#"LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_Gutenberg_1_englishless_epochs.pt",
#"LSTM_embedding-size_768_nhid_768_nlayers_1_dropout_02_gutenberg_big_3_english.pt",
#"LSTM_embedding-size_768_nhid_768_nlayers_1_dropout_02_gutenberg_big_2_english.pt",
#"LSTM_embedding-size_768_nhid_768_nlayers_1_dropout_02_gutenberg_big_1_english.pt",
    
"LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_wikipedia_english.pt",
"LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_3_english.pt",
"LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_2_english.pt",
"LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_1_english.pt",
"LSTM_embedding-size_768_nhid_300_nlayers_1_dropout_02_gutenberg_big_+_wiki_2_english.pt",

]

In [6]:
rnn_types = ['LSTM'] * len(name_templates)
ninps = ['768'] * len(name_templates)
nhids =  ['768', '768', '768', '768', '300'] * len(name_templates)
nlayers = [
    #'2', '2', '2',
    '2', '2', '2', '2', 
    '1'
]
dropouts =  ['02'] * len(name_templates)
memory_sizes = [np.inf] * len(name_templates) #, np.inf
vocab_paths = [
    #'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_+_wiki_+_lpp_2',
    #'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_+_wiki_2',
    #'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_2',
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/wikipedia',    
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_3',    
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_2',    
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_1',    
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_+_wiki_2',    
]
config_path_folder = '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/models/LSTM/configs/'
trained_model_folder = '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/'
path_to_data = '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/'

In [7]:
#template = '/Users/alexpsq/Code/Parietal/data/text_english_run*.txt' # path to text input
#config_path_folder = '/Users/alexpsq/Code/Parietal/data/configs/'
#trained_model_folder = '/Users/alexpsq/Code/Parietal'
#path_to_data = '/Users/alexpsq/Code/data/stimuli-representations'
#vocab_path = '/Users/alexpsq/Code/data/'

In [8]:
infos = [
    #['LSTM', 'embedding-size', '768', 'nhid', '768', 'nlayers', '2', 'dropout', '02', 'gutenberg_big_+_wiki_+_lpp', '2', 'english.pt'],
    #['LSTM', 'embedding-size', '768', 'nhid', '768', 'nlayers', '2', 'dropout', '02', 'gutenberg_big_+_wiki', '2', 'english.pt'],
    #['LSTM', 'embedding-size', '768', 'nhid', '768', 'nlayers', '2', 'dropout', '02', 'gutenberg_big', '2', 'english.pt'],
    ['LSTM', 'embedding-size', '768', 'nhid', '768', 'nlayers', '2', 'dropout', '02', 'wikipedia', 'english.pt'],
    ['LSTM', 'embedding-size', '768', 'nhid', '768', 'nlayers', '2', 'dropout', '02', 'Gutenberg_big', '3', 'english.pt'],
    ['LSTM', 'embedding-size', '768', 'nhid', '768', 'nlayers', '2', 'dropout', '02', 'Gutenberg_big', '2', 'english.pt'],
    ['LSTM', 'embedding-size', '768', 'nhid', '768', 'nlayers', '2', 'dropout', '02', 'Gutenberg_big', '1', 'english.pt'],
    ['LSTM', 'embedding-size', '768', 'nhid', '300', 'nlayers', '1', 'dropout', '02', 'Gutenberg_big_+_wiki', '2', 'english.pt'],
]
names = [
    #'embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_+_wiki_+_lpp_2_english',
    #'embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_+_wiki_2_english',
    #'embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_2_english',
    'embedding-size_768_nhid_768_nlayers_2_dropout_02_wikipedia_english',
    'embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_3_english',
    'embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_2_english',
    'embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_1_english',
    'embedding-size_768_nhid_300_nlayers_1_dropout_02_gutenberg_big_+_wiki_2_english',
]
config_paths = [
    #'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/models/LSTM/configs/config_LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_memory-size-inf_gutenberg_big_+_wiki_+_lpp_2_english.yml',
    #'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/models/LSTM/configs/config_LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_memory-size-inf_gutenberg_big_+_wiki_2_english.yml',
    #'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/models/LSTM/configs/config_LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_memory-size-inf_gutenberg_big_2_english.yml',
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/models/LSTM/configs/config_LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_memory-size-inf_wikipedia_english.yml',
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/models/LSTM/configs/config_LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_memory-size-inf_gutenberg_big_3_english.yml',
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/models/LSTM/configs/config_LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_memory-size-inf_gutenberg_big_2_english.yml',
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/models/LSTM/configs/config_LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_memory-size-inf_gutenberg_big_1_english.yml',
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/models/LSTM/configs/config_LSTM_embedding-size-768_nhid-300_nlayers-1_dropout-02_memory-size-inf_gutenberg_big_+_wiki_2_english.yml',
]
saving_path_folders = [
    #'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/english/LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_gutenberg_big_+_wiki_+_lpp_2',
    #'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/english/LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_gutenberg_big_+_wiki_2',
    #'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/english/LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_gutenberg_big_2',
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/english/LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_wikipedia',
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/english/LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_gutenberg_big_3',
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/english/LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_gutenberg_big_2',
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/english/LSTM_embedding-size-768_nhid-768_nlayers-2_dropout-02_gutenberg_big_1',
    '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/english/LSTM_embedding-size-768_nhid-300_nlayers-1_dropout-02_gutenberg_big_+_wiki_2',
]
prediction_types = [
    #'sequential',
    #'sequential',
    #'sequential',
    'sequential',
    'sequential',
    'sequential',
    'sequential',
    'sequential',
]
pretrained_lstm_models = [
    trained_model_folder + name_templates[index].format(rnn_type, ninp, nhid, nlayer, dropout, memory_size) for index, (rnn_type, ninp, nhid, nlayer, dropout, memory_size) in enumerate(zip(rnn_types, ninps, nhids, nlayers, dropouts, memory_sizes))] # path to the model from which we want to retrieve the activations


In [9]:
#pretrained_lstm_models = [
#    trained_model_folder + name_templates[index].format(rnn_type, ninp, nhid, nlayer, dropout, memory_size) for index, (rnn_type, ninp, nhid, nlayer, dropout, memory_size) in enumerate(zip(rnn_types, ninps, nhids, nlayers, dropouts, memory_sizes))] # path to the model from which we want to retrieve the activations
#infos = [os.path.basename(model).split('_') for model in pretrained_lstm_models]
#names = ['_'.join(os.path.basename(model).split('.')[0].split('_')[1:]) for model in pretrained_lstm_models]
#config_paths = [os.path.join(config_path_folder, 'config_' + name + '.yml') for name in names]
#config_paths = ['_'.join(config.split('_')[:-3]) + '_' + '_'.join(config.split('_')[-2:]) for config in config_paths]
#saving_path_folders = [
#    os.path.join(path_to_data, '{}/{}_Gutenberg_1'.format(language, name)) for name in names]
#prediction_types = ['sequential' for i in pretrained_lstm_models]

In [10]:
for path in pretrained_lstm_models:
    print(path)
    print(os.path.exists(path))

/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_wikipedia_english.pt
True
/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_3_english.pt
True
/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_2_english.pt
True
/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_gutenberg_big_1_english.pt
True
/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_embedding-size_768_nhid_300_nlayers_1_dropout_02_gutenberg_big_+_wiki_2_english.pt
True


In [7]:
def get_config(rnn_type='LSTM',
               language='english',
               ntoken=50001,
               ninp=650,
               nhid=650,
               nlayers=2,
               dropout='02',
               memory_size=np.inf,
               tie_weights=False,
               eos_separator='<eos>',
               cuda=True,
               weights_path=None,
               path_to_vocab=None,
               includ_surprisal=True,
               includ_entropy=True,
               parameters=['in', 'forget', 'out', 'c_tilde', 'hidden', 'cell']):
    config_template = {
        'rnn_type': rnn_type,
        'language': language,
        'ntoken': ntoken,
        'ninp': ninp,
        'nhid': nhid,
        'nlayers': nlayers,
        'dropout': int(dropout)/10,
        'memory_size': memory_size,
        'tie_weights': tie_weights,
        'eos_separator': eos_separator,
        'cuda': cuda,
        'weights_path': os.path.join(weights_path, name_template.format(rnn_type, ninp, nhid, nlayers, dropout, 0)).replace('_memory-size-0', ''),
        'path_to_vocab': path_to_vocab,
        'includ_surprisal': includ_surprisal,
        'includ_entropy': includ_entropy,
        'parameters': parameters}
    return config_template


In [33]:
for index, (rnn_type, ninp, nhid, nlayer, dropout, memory_size) in enumerate(zip(rnn_types, ninps, nhids, nlayers, dropouts, memory_sizes)):
    config_template =  get_config(rnn_type=rnn_type,
                                   language='english',
                                   ntoken=50001,
                                   ninp=int(ninp),
                                   nhid=int(nhid),
                                   nlayers=int(nlayer),
                                   dropout=dropout,
                                   memory_size=memory_size,
                                   weights_path=trained_model_folder,
                                   path_to_vocab=vocab_path,
                                 parameters=['hidden'])
    save_yaml(config_template, config_paths[index])

In [34]:
config_template

{'rnn_type': 'LSTM',
 'language': 'english',
 'ntoken': 50001,
 'ninp': 650,
 'nhid': 650,
 'nlayers': 2,
 'dropout': 0.2,
 'memory_size': 100,
 'tie_weights': False,
 'eos_separator': '<eos>',
 'cuda': True,
 'weights_path': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/weights_LSTM_embedding-size-650_nhid-650_nlayers-2_dropout-02_wiki-kristina_english.pt',
 'path_to_vocab': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training',
 'includ_surprisal': True,
 'includ_entropy': True,
 'parameters': ['hidden']}

Creating iterator for each run:

In [119]:
acc = 0
for i in iterator_list:
    for j in i:
        if 'unk' in j:
            acc+=1
            print(j)

<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk

<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk

<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk>
<unk

In [120]:
acc

4035

In [15]:
os.path.basename(paths[0])[:-3]

'LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_Gutenberg_big_+_wiki_2_english_for_RSA_checkpoint_epoch-1_batch-0.'

## Activation extraction

In [27]:
paths = [
#"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_L-4_H-768_final/LSTM_embedding-size_768_nhid_768_nlayers_4_dropout_02_final_version_englishfinal_version.pt",
#"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_L-2_H-768_final/LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_final_version_englishfinal_version.pt",
#"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_L-1_H-768_final/LSTM_embedding-size_768_nhid_768_nlayers_1_dropout_02_final_version_englishfinal_version.pt",
#"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_L-1_H-768_final/LSTM_embedding-size_768_nhid_768_nlayers_1_dropout_02_final_version_englishfinal_version_checkpoint_epoch-10_batch-0.pt",



#"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_L-2_H-768_xlarge/LSTM_embedding-size_768_nhid_768_nlayers_2_dropout_02_Gutenberg_xlarge_english.pt",
#"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_L-1_H-768_wikipedia/LSTM_embedding-size_768_nhid_768_nlayers_1_dropout_02_wikipedia_english.pt",
#"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_L-2_H-650_wikipedia/hidden650_batch128_dropout0.2_lr20.0.pt",
"/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/derivatives/fMRI/models/english/LSTM/LSTM_L-1_H-768_xlarge/LSTM_embedding-size_768_nhid_768_nlayers_1_dropout_02_Gutenberg_xlarge_english.pt",



]

config_paths = [
    #{'cuda': False,'dropout': 0.2,'eos_separator': '<eos>','includ_entropy': True,'includ_surprisal': True,'language': 'english','memory_size': np.inf,'nhid': 768,'ninp': 768,'nlayers': 2,'ntoken': 50001,'parameters': ['hidden'],'path_to_vocab': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_xlarge','rnn_type': 'LSTM','tie_weights': False,'weights_path': paths[0]},
    #{'cuda': False,'dropout': 0.2,'eos_separator': '<eos>','includ_entropy': True,'includ_surprisal': True,'language': 'english','memory_size': np.inf,'nhid': 768,'ninp': 768,'nlayers': 1,'ntoken': 50001,'parameters': ['hidden'],'path_to_vocab': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/wikipedia','rnn_type': 'LSTM','tie_weights': False,'weights_path': paths[1]},
    #{'cuda': False,'dropout': 0.2,'eos_separator': '<eos>','includ_entropy': True,'includ_surprisal': True,'language': 'english','memory_size': np.inf,'nhid': 650,'ninp': 650,'nlayers': 2,'ntoken': 50001,'parameters': ['hidden'],'path_to_vocab': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/LSTM','rnn_type': 'LSTM','tie_weights': False,'weights_path': paths[5]},
    {'cuda': False,'dropout': 0.2,'eos_separator': '<eos>','includ_entropy': True,'includ_surprisal': True,'language': 'english','memory_size': np.inf,'nhid': 768,'ninp': 768,'nlayers': 1,'ntoken': 50001,'parameters': ['hidden'],'path_to_vocab': '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_xlarge','rnn_type': 'LSTM','tie_weights': False,'weights_path': paths[0]},
    ]


rnn_types = ['LSTM'] * len(paths)
ninps = ['768'] * len(paths)
nhids =  ['768'] * len(paths)
nlayers = ['1'] * 3
dropouts =  ['02'] * len(paths)
memory_sizes = [np.inf] * len(paths) #, np.inf
vocab_paths = [config['path_to_vocab'] for config in config_paths]
config_path_folder = '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/models/LSTM/configs/'
path_to_data = '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/'
#names = [os.path.basename(path)[:-3].replace('dropout_02_final_version_englishfinal_version_checkpoint', 'trained') for path in paths]
names = [
#"LSTM_L-2_H-768_xlarge_trained",
#"LSTM_embedding-size-768_nhid-768_nlayers-1_dropout-02_wikipedia",
#"LSTM_L-2_H-650_wikipedia_trained",
"LSTM_L-1_H-768_xlarge_trained",
]
saving_path_folders = [f'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/english/{name}' for name in names]
#names = ["LSTM_embedding-size_768_nhid_768_nlayers_4_seed-1111", "LSTM_embedding-size_768_nhid_768_nlayers_2_seed-1111", "LSTM_embedding-size_768_nhid_768_nlayers_1_seed-1111"]
#saving_path_folders = [f'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/stimuli-representations/english/{name}' for name in names]
prediction_types = ['sequential'] * len(paths)


In [28]:
for index, config in enumerate(config_paths):
    # tokenizing LPP text
    paths = sorted(glob.glob(template))
    vocab_path = vocab_paths[index]
    vocab = Dictionary(vocab_path, language)
    iterator_list = [tokenize(path, language, train=False, vocab=vocab) for path in paths]

    # instanciating LSTM extractor
    extractor = LSTMExtractor(config, 
                              language, 
                              names[index], 
                              prediction_types[index], 
                              output_hidden_states=True, 
                              memory_size=memory_sizes[index], 
                              randomize=False,
                              seed=1111
                             )

    # Extracting activations
    print(extractor.name, ' - Extracting activations ...')
    for run_index, iterator in tqdm(enumerate(iterator_list)):
        print("############# Run {} #############".format(run_index))
        activations  = extractor.extract_activations(iterator, language)

        transform(
            activations, 
            saving_path_folders[index], 
            'activations', 
            run_index=run_index,
            n_layers_hidden=int(nlayers[index]),
            hidden_size=int(nhids[index]))  
        #activations.to_csv(os.path.join(saving_path_folders[index], 'activations_run{}.csv'.format(run_index + 1)), index=False)




100%|██████████| 135/135 [00:00<00:00, 467959.54it/s]
100%|██████████| 135/135 [00:01<00:00, 129.79it/s]
100%|██████████| 135/135 [00:00<00:00, 430266.75it/s]
100%|██████████| 135/135 [00:01<00:00, 119.29it/s]
100%|██████████| 176/176 [00:00<00:00, 705733.75it/s]
100%|██████████| 176/176 [00:01<00:00, 139.72it/s]
100%|██████████| 173/173 [00:00<00:00, 448186.90it/s]
100%|██████████| 173/173 [00:01<00:00, 148.02it/s]
100%|██████████| 177/177 [00:00<00:00, 463126.52it/s]
100%|██████████| 177/177 [00:01<00:00, 157.06it/s]
100%|██████████| 216/216 [00:00<00:00, 829642.55it/s]
100%|██████████| 216/216 [00:01<00:00, 165.35it/s]
100%|██████████| 196/196 [00:00<00:00, 660308.10it/s]
100%|██████████| 196/196 [00:01<00:00, 149.53it/s]
100%|██████████| 145/145 [00:00<00:00, 232216.14it/s]
100%|██████████| 145/145 [00:01<00:00, 130.81it/s]
100%|██████████| 207/207 [00:00<00:00, 697366.21it/s]
100%|██████████| 207/207 [00:01<00:00, 150.88it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s][A
7it [00:00, 62

LSTM_embedding-size_768_nhid_768_nlayers_1_dropout_02  - Extracting activations ...
############# Run 0 #############



15it [00:00, 65.28it/s][A
23it [00:00, 67.02it/s][A
31it [00:00, 68.30it/s][A
39it [00:00, 69.22it/s][A
47it [00:00, 69.99it/s][A
55it [00:00, 70.63it/s][A
63it [00:00, 71.06it/s][A
71it [00:01, 71.39it/s][A
79it [00:01, 71.30it/s][A
86it [00:01, 70.49it/s][A
94it [00:01, 70.61it/s][A
102it [00:01, 70.88it/s][A
110it [00:01, 71.44it/s][A
118it [00:01, 71.86it/s][A
126it [00:01, 71.89it/s][A
134it [00:01, 72.19it/s][A
142it [00:01, 72.44it/s][A
150it [00:02, 72.61it/s][A
158it [00:02, 72.76it/s][A
166it [00:02, 72.82it/s][A
174it [00:02, 72.88it/s][A
182it [00:02, 72.85it/s][A
190it [00:02, 72.45it/s][A
198it [00:02, 72.60it/s][A
206it [00:02, 72.69it/s][A
214it [00:02, 72.80it/s][A
222it [00:03, 72.85it/s][A
230it [00:03, 72.83it/s][A
238it [00:03, 72.01it/s][A
246it [00:03, 71.32it/s][A
254it [00:03, 71.55it/s][A
262it [00:03, 71.77it/s][A
270it [00:03, 71.80it/s][A
278it [00:03, 71.94it/s][A
286it [00:03, 72.03it/s][A
294it [00:04, 72.15it/s][A
30

(1894, 768)
(1894, 770)


1it [00:30, 30.97s/it]
0it [00:00, ?it/s][A
3it [00:00, 29.50it/s][A

############# Run 1 #############



10it [00:00, 35.33it/s][A
17it [00:00, 41.40it/s][A
25it [00:00, 47.37it/s][A
33it [00:00, 52.67it/s][A
41it [00:00, 57.15it/s][A
49it [00:00, 60.80it/s][A
57it [00:00, 63.59it/s][A
65it [00:00, 65.74it/s][A
73it [00:01, 67.40it/s][A
81it [00:01, 68.66it/s][A
89it [00:01, 69.47it/s][A
97it [00:01, 70.12it/s][A
105it [00:01, 70.61it/s][A
113it [00:01, 70.92it/s][A
121it [00:01, 71.16it/s][A
129it [00:01, 70.77it/s][A
137it [00:01, 70.99it/s][A
145it [00:02, 71.14it/s][A
153it [00:02, 71.33it/s][A
161it [00:02, 70.61it/s][A
169it [00:02, 70.24it/s][A
177it [00:02, 70.58it/s][A
185it [00:02, 70.87it/s][A
193it [00:02, 71.05it/s][A
201it [00:02, 71.16it/s][A
209it [00:03, 71.20it/s][A
217it [00:03, 71.30it/s][A
225it [00:03, 71.42it/s][A
233it [00:03, 71.49it/s][A
241it [00:03, 71.23it/s][A
249it [00:03, 71.38it/s][A
257it [00:03, 71.45it/s][A
265it [00:03, 71.51it/s][A
273it [00:03, 71.57it/s][A
281it [00:04, 71.61it/s][A
289it [00:04, 71.56it/s][A
297

(2093, 768)
(2093, 770)


2it [01:04, 31.82s/it]
0it [00:00, ?it/s][A
2it [00:00, 19.21it/s][A

############# Run 2 #############



9it [00:00, 24.52it/s][A
16it [00:00, 30.36it/s][A
23it [00:00, 36.49it/s][A
31it [00:00, 42.63it/s][A
38it [00:00, 48.06it/s][A
45it [00:00, 52.68it/s][A
53it [00:00, 56.99it/s][A
61it [00:00, 60.67it/s][A
69it [00:01, 63.57it/s][A
77it [00:01, 65.75it/s][A
85it [00:01, 67.36it/s][A
93it [00:01, 68.62it/s][A
101it [00:01, 69.54it/s][A
109it [00:01, 70.22it/s][A
117it [00:01, 70.60it/s][A
125it [00:01, 70.95it/s][A
133it [00:01, 71.19it/s][A
141it [00:02, 71.28it/s][A
149it [00:02, 71.40it/s][A
157it [00:02, 71.34it/s][A
165it [00:02, 71.45it/s][A
173it [00:02, 71.46it/s][A
181it [00:02, 71.58it/s][A
189it [00:02, 70.85it/s][A
197it [00:02, 70.30it/s][A
205it [00:02, 70.54it/s][A
213it [00:03, 70.84it/s][A
221it [00:03, 71.06it/s][A
229it [00:03, 71.19it/s][A
237it [00:03, 71.34it/s][A
245it [00:03, 71.46it/s][A
253it [00:03, 71.53it/s][A
261it [00:03, 71.45it/s][A
269it [00:03, 71.42it/s][A
277it [00:03, 71.36it/s][A
285it [00:04, 71.44it/s][A
293i

2071it [00:29, 69.17it/s][A
2078it [00:30, 69.19it/s][A
2085it [00:30, 69.00it/s][A
2092it [00:30, 69.06it/s][A
2099it [00:30, 69.11it/s][A
2106it [00:30, 69.17it/s][A
2113it [00:30, 68.39it/s][A
2120it [00:30, 67.93it/s][A
2127it [00:30, 68.16it/s][A
2134it [00:30, 68.45it/s][A
2141it [00:30, 68.69it/s][A
2148it [00:31, 68.80it/s][A
2155it [00:31, 68.99it/s][A
2162it [00:31, 69.05it/s][A
2169it [00:31, 69.11it/s][A
2176it [00:31, 69.08it/s][A
2183it [00:31, 69.19it/s][A
2190it [00:31, 69.14it/s][A
2197it [00:31, 68.79it/s][A
2204it [00:31, 68.98it/s][A
2211it [00:31, 69.09it/s][A
2218it [00:32, 69.13it/s][A
2225it [00:32, 69.10it/s][A
2232it [00:32, 69.10it/s][A
2239it [00:32, 69.13it/s][A
2246it [00:32, 69.19it/s][A
2253it [00:32, 69.14it/s][A
2260it [00:32, 68.39it/s][A
2267it [00:32, 67.91it/s][A
2274it [00:32, 68.12it/s][A
2281it [00:33, 68.39it/s][A
2288it [00:33, 68.63it/s][A
2295it [00:33, 68.59it/s][A
2302it [00:33, 68.67it/s][A
2309it [00:33,

(2297, 768)
(2297, 770)


3it [01:43, 33.80s/it]
0it [00:00, ?it/s][A
2it [00:00, 19.42it/s][A

############# Run 3 #############



9it [00:00, 24.73it/s][A
17it [00:00, 30.70it/s][A
24it [00:00, 36.91it/s][A
31it [00:00, 42.97it/s][A
38it [00:00, 48.44it/s][A
45it [00:00, 52.74it/s][A
52it [00:00, 56.36it/s][A
59it [00:00, 59.62it/s][A
66it [00:01, 62.24it/s][A
73it [00:01, 64.15it/s][A
80it [00:01, 65.57it/s][A
87it [00:01, 66.30it/s][A
94it [00:01, 67.13it/s][A
101it [00:01, 67.91it/s][A
108it [00:01, 68.38it/s][A
115it [00:01, 68.38it/s][A
122it [00:01, 68.66it/s][A
129it [00:01, 68.92it/s][A
136it [00:02, 69.10it/s][A
143it [00:02, 69.18it/s][A
150it [00:02, 69.26it/s][A
157it [00:02, 69.34it/s][A
164it [00:02, 69.40it/s][A
171it [00:02, 69.47it/s][A
178it [00:02, 69.41it/s][A
185it [00:02, 69.34it/s][A
192it [00:02, 68.52it/s][A
199it [00:02, 68.02it/s][A
206it [00:03, 68.32it/s][A
213it [00:03, 68.64it/s][A
220it [00:03, 68.80it/s][A
227it [00:03, 68.62it/s][A
234it [00:03, 68.91it/s][A
241it [00:03, 69.08it/s][A
248it [00:03, 69.17it/s][A
255it [00:03, 69.21it/s][A
262it

2019it [00:29, 67.35it/s][A
2026it [00:29, 66.95it/s][A
2033it [00:29, 66.66it/s][A
2040it [00:29, 66.53it/s][A
2047it [00:30, 66.90it/s][A
2054it [00:30, 67.22it/s][A
2061it [00:30, 67.37it/s][A
2068it [00:30, 67.51it/s][A
2075it [00:30, 67.63it/s][A
2082it [00:30, 67.32it/s][A
2089it [00:30, 66.50it/s][A
2096it [00:30, 66.67it/s][A
2103it [00:30, 66.96it/s][A
2110it [00:31, 67.12it/s][A
2117it [00:31, 67.31it/s][A
2124it [00:31, 67.49it/s][A
2131it [00:31, 67.61it/s][A
2138it [00:31, 67.69it/s][A
2145it [00:31, 67.72it/s][A
2152it [00:31, 67.78it/s][A
2159it [00:31, 67.86it/s][A
2166it [00:31, 67.75it/s][A
2173it [00:31, 67.77it/s][A
2180it [00:32, 67.73it/s][A
2187it [00:32, 67.82it/s][A
2194it [00:32, 67.83it/s][A
2201it [00:32, 67.82it/s][A
2208it [00:32, 67.76it/s][A
2215it [00:32, 67.81it/s][A
2222it [00:32, 67.77it/s][A
2229it [00:32, 67.10it/s][A
2236it [00:32, 65.95it/s][A
2243it [00:33, 66.15it/s][A
2250it [00:33, 66.50it/s][A
2257it [00:33,

(2152, 768)
(2152, 770)


4it [02:19, 34.66s/it]
0it [00:00, ?it/s][A
3it [00:00, 26.31it/s][A

############# Run 4 #############



10it [00:00, 32.32it/s][A
17it [00:00, 38.54it/s][A
24it [00:00, 44.31it/s][A
31it [00:00, 49.49it/s][A
38it [00:00, 54.25it/s][A
46it [00:00, 58.29it/s][A
53it [00:00, 61.27it/s][A
60it [00:00, 63.12it/s][A
67it [00:01, 64.48it/s][A
74it [00:01, 65.39it/s][A
81it [00:01, 66.15it/s][A
88it [00:01, 66.43it/s][A
95it [00:01, 66.94it/s][A
102it [00:01, 67.26it/s][A
109it [00:01, 67.41it/s][A
116it [00:01, 67.63it/s][A
123it [00:01, 67.80it/s][A
130it [00:01, 67.84it/s][A
137it [00:02, 67.93it/s][A
144it [00:02, 67.92it/s][A
151it [00:02, 67.97it/s][A
158it [00:02, 67.92it/s][A
165it [00:02, 67.67it/s][A
172it [00:02, 66.81it/s][A
179it [00:02, 66.77it/s][A
186it [00:02, 66.96it/s][A
193it [00:02, 67.04it/s][A
200it [00:03, 67.36it/s][A
207it [00:03, 67.46it/s][A
214it [00:03, 67.67it/s][A
221it [00:03, 67.82it/s][A
228it [00:03, 67.89it/s][A
235it [00:03, 67.99it/s][A
242it [00:03, 68.06it/s][A
249it [00:03, 68.10it/s][A
256it [00:03, 68.12it/s][A
263i

2020it [00:29, 67.92it/s][A
2027it [00:30, 67.99it/s][A
2034it [00:30, 67.93it/s][A
2041it [00:30, 67.82it/s][A
2048it [00:30, 66.99it/s][A
2055it [00:30, 66.60it/s][A
2062it [00:30, 66.59it/s][A
2069it [00:30, 67.01it/s][A
2076it [00:30, 67.29it/s][A
2083it [00:30, 67.47it/s][A
2090it [00:30, 67.62it/s][A
2097it [00:31, 67.73it/s][A
2104it [00:31, 67.72it/s][A
2111it [00:31, 67.82it/s][A
2118it [00:31, 67.91it/s][A
2125it [00:31, 67.99it/s][A
2132it [00:31, 67.96it/s][A
2139it [00:31, 68.03it/s][A
2146it [00:31, 68.04it/s][A
2153it [00:31, 68.09it/s][A
2160it [00:32, 68.11it/s][A
2167it [00:32, 67.98it/s][A
2174it [00:32, 68.00it/s][A
2181it [00:32, 67.93it/s][A
2188it [00:32, 67.47it/s][A
2195it [00:32, 66.53it/s][A
2202it [00:32, 66.68it/s][A
2209it [00:32, 66.97it/s][A
2216it [00:32, 67.19it/s][A
2223it [00:32, 67.24it/s][A
2230it [00:33, 67.28it/s][A
2242it [00:33, 67.43it/s][A


(2065, 768)
(2065, 770)


5it [02:55, 34.94s/it]
0it [00:00, ?it/s][A
2it [00:00, 19.88it/s][A

############# Run 5 #############



9it [00:00, 25.30it/s][A
16it [00:00, 31.28it/s][A
23it [00:00, 37.41it/s][A
30it [00:00, 43.14it/s][A
37it [00:00, 48.30it/s][A
44it [00:00, 52.76it/s][A
51it [00:00, 56.39it/s][A
58it [00:00, 59.28it/s][A
65it [00:01, 61.53it/s][A
72it [00:01, 62.98it/s][A
79it [00:01, 63.57it/s][A
86it [00:01, 64.52it/s][A
93it [00:01, 65.43it/s][A
100it [00:01, 66.15it/s][A
107it [00:01, 66.10it/s][A
114it [00:01, 66.68it/s][A
121it [00:01, 67.10it/s][A
128it [00:01, 67.39it/s][A
135it [00:02, 67.57it/s][A
142it [00:02, 67.74it/s][A
149it [00:02, 67.89it/s][A
156it [00:02, 67.93it/s][A
163it [00:02, 68.00it/s][A
170it [00:02, 68.03it/s][A
177it [00:02, 68.06it/s][A
184it [00:02, 68.01it/s][A
191it [00:02, 68.00it/s][A
198it [00:02, 67.97it/s][A
205it [00:03, 68.02it/s][A
212it [00:03, 68.09it/s][A
219it [00:03, 62.32it/s][A
226it [00:03, 63.27it/s][A
233it [00:03, 64.55it/s][A
240it [00:03, 65.52it/s][A
247it [00:03, 66.27it/s][A
254it [00:03, 66.82it/s][A
261it

2118it [00:30, 70.45it/s][A
2126it [00:31, 70.37it/s][A
2134it [00:31, 69.44it/s][A
2141it [00:31, 69.31it/s][A
2149it [00:31, 69.70it/s][A
2157it [00:31, 70.04it/s][A
2165it [00:31, 70.25it/s][A
2173it [00:31, 70.44it/s][A
2181it [00:31, 70.59it/s][A
2189it [00:31, 70.74it/s][A
2197it [00:32, 70.81it/s][A
2205it [00:32, 70.89it/s][A
2213it [00:32, 70.97it/s][A
2221it [00:32, 70.96it/s][A
2229it [00:32, 71.02it/s][A
2237it [00:32, 70.96it/s][A
2245it [00:32, 70.73it/s][A
2253it [00:32, 70.83it/s][A
2261it [00:32, 70.92it/s][A
2269it [00:33, 71.06it/s][A
2277it [00:33, 70.92it/s][A
2285it [00:33, 70.01it/s][A
2293it [00:33, 69.89it/s][A
2301it [00:33, 70.15it/s][A
2309it [00:33, 70.34it/s][A
2317it [00:33, 70.61it/s][A
2325it [00:33, 70.75it/s][A
2333it [00:34, 70.81it/s][A
2341it [00:34, 70.97it/s][A
2349it [00:34, 71.07it/s][A
2357it [00:34, 70.86it/s][A
2365it [00:34, 71.03it/s][A
2373it [00:34, 71.15it/s][A
2381it [00:34, 71.17it/s][A
2389it [00:34,

(2404, 768)
(2404, 770)


6it [03:36, 36.70s/it]
0it [00:00, ?it/s][A
1it [00:00,  9.87it/s][A

############# Run 6 #############



8it [00:00, 13.29it/s][A
16it [00:00, 17.56it/s][A
24it [00:00, 22.68it/s][A
32it [00:00, 28.46it/s][A
39it [00:00, 34.60it/s][A
46it [00:00, 40.74it/s][A
53it [00:00, 46.50it/s][A
60it [00:00, 51.70it/s][A
67it [00:01, 55.72it/s][A
74it [00:01, 58.91it/s][A
82it [00:01, 62.02it/s][A
90it [00:01, 64.45it/s][A
98it [00:01, 66.34it/s][A
106it [00:01, 67.76it/s][A
114it [00:01, 68.36it/s][A
122it [00:01, 68.59it/s][A
129it [00:01, 68.75it/s][A
136it [00:02, 68.99it/s][A
144it [00:02, 69.55it/s][A
152it [00:02, 70.05it/s][A
160it [00:02, 70.41it/s][A
168it [00:02, 70.72it/s][A
176it [00:02, 70.86it/s][A
184it [00:02, 70.96it/s][A
192it [00:02, 71.03it/s][A
200it [00:02, 71.09it/s][A
208it [00:03, 71.16it/s][A
216it [00:03, 70.75it/s][A
224it [00:03, 69.91it/s][A
232it [00:03, 70.08it/s][A
240it [00:03, 70.35it/s][A
248it [00:03, 70.24it/s][A
256it [00:03, 70.52it/s][A
264it [00:03, 70.71it/s][A
272it [00:03, 70.85it/s][A
280it [00:04, 70.95it/s][A
288it

2273it [00:32, 71.08it/s][A
2281it [00:32, 71.13it/s][A
2289it [00:32, 71.03it/s][A
2297it [00:32, 71.01it/s][A
2305it [00:32, 70.97it/s][A
2313it [00:32, 70.98it/s][A
2321it [00:33, 70.89it/s][A
2329it [00:33, 70.01it/s][A
2337it [00:33, 69.70it/s][A
2345it [00:33, 69.94it/s][A
2353it [00:33, 70.25it/s][A
2361it [00:33, 70.43it/s][A
2369it [00:33, 70.53it/s][A
2377it [00:33, 70.52it/s][A
2385it [00:33, 70.72it/s][A
2393it [00:34, 70.86it/s][A
2401it [00:34, 70.96it/s][A
2409it [00:34, 71.03it/s][A
2417it [00:34, 71.04it/s][A
2425it [00:34, 71.12it/s][A
2433it [00:34, 70.72it/s][A
2441it [00:34, 70.65it/s][A
2449it [00:34, 70.57it/s][A
2457it [00:34, 70.65it/s][A
2465it [00:35, 70.87it/s][A
2473it [00:35, 70.89it/s][A
2481it [00:35, 70.01it/s][A
2489it [00:35, 69.83it/s][A
2497it [00:35, 70.19it/s][A
2505it [00:35, 70.39it/s][A
2513it [00:35, 69.92it/s][A
2520it [00:35, 69.62it/s][A
2528it [00:36, 69.90it/s][A
2536it [00:36, 70.28it/s][A
2544it [00:36,

(2435, 768)
(2435, 770)


7it [04:16, 37.76s/it]
0it [00:00, ?it/s][A
2it [00:00, 18.01it/s][A

############# Run 7 #############



9it [00:00, 23.16it/s][A
17it [00:00, 29.01it/s][A
25it [00:00, 35.25it/s][A
33it [00:00, 41.45it/s][A
40it [00:00, 47.17it/s][A
47it [00:00, 52.27it/s][A
54it [00:00, 56.29it/s][A
61it [00:00, 59.80it/s][A
69it [00:01, 62.65it/s][A
76it [00:01, 64.61it/s][A
84it [00:01, 66.39it/s][A
92it [00:01, 67.64it/s][A
99it [00:01, 67.68it/s][A
106it [00:01, 67.93it/s][A
114it [00:01, 68.75it/s][A
122it [00:01, 69.39it/s][A
130it [00:01, 69.93it/s][A
138it [00:02, 70.30it/s][A
146it [00:02, 70.56it/s][A
154it [00:02, 70.76it/s][A
162it [00:02, 70.91it/s][A
170it [00:02, 70.96it/s][A
178it [00:02, 71.01it/s][A
186it [00:02, 70.95it/s][A
194it [00:02, 70.72it/s][A
202it [00:02, 70.77it/s][A
210it [00:03, 70.92it/s][A
218it [00:03, 71.00it/s][A
226it [00:03, 71.05it/s][A
234it [00:03, 71.14it/s][A
242it [00:03, 71.07it/s][A
250it [00:03, 70.06it/s][A
258it [00:03, 69.71it/s][A
266it [00:03, 70.00it/s][A
274it [00:03, 70.35it/s][A
282it [00:04, 70.58it/s][A
290it

2150it [00:30, 68.80it/s][A
2157it [00:30, 68.94it/s][A
2164it [00:31, 69.09it/s][A
2171it [00:31, 69.25it/s][A
2183it [00:31, 69.63it/s][A


(2038, 768)
(2038, 770)


8it [04:50, 36.54s/it]
0it [00:00, ?it/s][A
3it [00:00, 27.86it/s][A

############# Run 8 #############



11it [00:00, 34.03it/s][A
19it [00:00, 40.27it/s][A
26it [00:00, 45.87it/s][A
33it [00:00, 50.93it/s][A
40it [00:00, 55.16it/s][A
47it [00:00, 58.68it/s][A
54it [00:00, 61.45it/s][A
61it [00:00, 63.56it/s][A
68it [00:01, 65.11it/s][A
75it [00:01, 66.22it/s][A
82it [00:01, 66.96it/s][A
89it [00:01, 67.77it/s][A
96it [00:01, 68.38it/s][A
103it [00:01, 68.77it/s][A
110it [00:01, 69.00it/s][A
117it [00:01, 69.15it/s][A
124it [00:01, 69.29it/s][A
131it [00:01, 69.31it/s][A
138it [00:02, 68.20it/s][A
145it [00:02, 67.96it/s][A
152it [00:02, 68.39it/s][A
159it [00:02, 68.73it/s][A
166it [00:02, 68.99it/s][A
173it [00:02, 69.25it/s][A
180it [00:02, 69.43it/s][A
187it [00:02, 69.50it/s][A
194it [00:02, 69.51it/s][A
201it [00:02, 69.48it/s][A
208it [00:03, 69.51it/s][A
215it [00:03, 69.53it/s][A
222it [00:03, 69.62it/s][A
229it [00:03, 69.73it/s][A
236it [00:03, 69.76it/s][A
243it [00:03, 69.81it/s][A
250it [00:03, 69.58it/s][A
257it [00:03, 69.62it/s][A
264i

2021it [00:29, 69.59it/s][A
2028it [00:29, 69.65it/s][A
2035it [00:29, 69.64it/s][A
2042it [00:29, 69.73it/s][A
2049it [00:29, 69.35it/s][A
2056it [00:29, 68.95it/s][A
2063it [00:29, 68.15it/s][A
2070it [00:29, 67.56it/s][A
2077it [00:30, 68.06it/s][A
2084it [00:30, 68.58it/s][A
2091it [00:30, 68.93it/s][A
2098it [00:30, 69.10it/s][A
2105it [00:30, 69.22it/s][A
2112it [00:30, 69.36it/s][A
2119it [00:30, 69.39it/s][A
2126it [00:30, 69.34it/s][A
2133it [00:30, 69.30it/s][A
2140it [00:30, 69.27it/s][A
2147it [00:31, 69.25it/s][A
2154it [00:31, 69.31it/s][A
2161it [00:31, 69.05it/s][A
2168it [00:31, 69.13it/s][A
2175it [00:31, 69.23it/s][A
2182it [00:31, 69.11it/s][A
2189it [00:31, 69.17it/s][A
2196it [00:31, 69.29it/s][A
2203it [00:31, 69.38it/s][A
2210it [00:32, 68.96it/s][A
2217it [00:32, 67.97it/s][A
2224it [00:32, 68.13it/s][A
2231it [00:32, 68.43it/s][A
2238it [00:32, 68.63it/s][A
2245it [00:32, 68.84it/s][A
2252it [00:32, 69.03it/s][A
2259it [00:32,

(2530, 768)
(2530, 770)


9it [05:32, 36.97s/it]


## Creating datasets for training

In [18]:
import lstm_utils as utils
import random
from joblib import Parallel, delayed

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]
        
def clean(sent):
    if len(sent.replace('<unk>', '')) > 7:
        if (sent.count('<unk>') / len(sent.split(' '))) <0.10:
            return sent
        
def f(text):
    return ' '.join(tokenize(text, 'english', True, vocab=None))

def create_set(block, saving_path, relative_size=30, seed=1111, name='test'):
    """Create test set from block with given proportion (relative_size).
    """
    random.seed(seed)
    random.shuffle(block)
    N = len(block)
    assert ((relative_size<=100) and (relative_size>0))
    test_set = block[:int(N*relative_size/100)]
    preprocessed_texts = Parallel(n_jobs=-1)(delayed(f)(text) for text in tqdm(test_set))
    test_text = ' '.join(preprocessed_texts)
    
    tmp = test_text.split('<eos>\n')
    test_text = '<eos>\n'.join(list(filter(None, Parallel(n_jobs=-1)(delayed(clean)(sent) for sent in tqdm(tmp)))))

    utils.write(os.path.join(saving_path, f'{name}.txt'), test_text)



In [24]:

seed = 1111
files = sorted(glob.glob('/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/Gutenberg/books/*_corrected.txt'))

random.seed(seed)
random.shuffle(files)

blocks = list(chunks(files, 3813*3)) # 3813*3 books for each LSTM --> 3 datasets are created
# [['B', 'H', 'G'], ['D', 'A', 'C'], ['E', 'F', 'I'], ['J', 'K']]

In [35]:
l1 = 31*41
print(l1)
l2 = 31*41*3
print(l2)
l3 = 31*41*3*3
print(l3)
l4 = 31*41*2*3*3
print(l4)

1271
3813
11439
22878


In [38]:

joined_block = blocks[0]+blocks[1]
names = ["Gutenberg_small", "Gutenberg_medium", "Gutenberg_large", "Gutenberg_xlarge"]

for index, l in enumerate([l1, l2, l3, l4]):
    block = joined_block[:l]
    preprocessed_texts = Parallel(n_jobs=-1)(delayed(f)(text) for text in tqdm(block))
    block_text = ' '.join(preprocessed_texts)

    tmp = block_text.split('<eos>\n')
    block_text = '<eos>\n'.join(list(filter(None, Parallel(n_jobs=-1)(delayed(clean)(sent) for sent in tqdm(tmp)))))


    utils.write(f'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/{names[index]}/train.txt', block_text)
    #block_text += wikipedia_train
    #utils.write(f'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_+_wiki_{index+1}/train.txt', block_text)
    #block_text += lpp
    #utils.write(f'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_+_wiki_+_lpp_{index+1}/train.txt', block_text)


100%|██████████| 1271/1271 [00:01<00:00, 1173.18it/s]
100%|██████████| 2542542/2542542 [00:15<00:00, 166572.68it/s]
100%|██████████| 3813/3813 [00:07<00:00, 483.66it/s]
100%|██████████| 7942080/7942080 [01:03<00:00, 125793.63it/s]
100%|██████████| 11439/11439 [00:20<00:00, 559.90it/s]
100%|██████████| 23895725/23895725 [07:07<00:00, 55844.68it/s] 
100%|██████████| 22878/22878 [00:38<00:00, 596.32it/s]
100%|██████████| 47395788/47395788 [26:58<00:00, 29290.43it/s]


In [6]:
wikipedia_train = '/Volumes/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/wikipedia/train.txt'
wikipedia_train = open(wikipedia_train, 'r').read()

paths = sorted(glob.glob(template))
iterator_list = [tokenize(path, language, train=False, vocab=None) for path in paths]
lpp = ' '.join([' '.join(iterator_list[i]).replace('<eos>', '<eos>\n') for i in range(9)])

100%|██████████| 135/135 [00:00<00:00, 456269.98it/s]
100%|██████████| 135/135 [00:00<00:00, 146388.58it/s]
100%|██████████| 135/135 [00:00<00:00, 527708.33it/s]
100%|██████████| 135/135 [00:00<00:00, 132606.80it/s]
100%|██████████| 176/176 [00:00<00:00, 772173.12it/s]
100%|██████████| 176/176 [00:00<00:00, 228827.50it/s]
100%|██████████| 173/173 [00:00<00:00, 935070.35it/s]
100%|██████████| 173/173 [00:00<00:00, 221291.43it/s]
100%|██████████| 177/177 [00:00<00:00, 834148.10it/s]
100%|██████████| 177/177 [00:00<00:00, 263633.45it/s]
100%|██████████| 216/216 [00:00<00:00, 877877.58it/s]
100%|██████████| 216/216 [00:00<00:00, 284717.05it/s]
100%|██████████| 196/196 [00:00<00:00, 975188.12it/s]
100%|██████████| 196/196 [00:00<00:00, 269978.19it/s]
100%|██████████| 145/145 [00:00<00:00, 740772.33it/s]
100%|██████████| 145/145 [00:00<00:00, 219716.07it/s]
100%|██████████| 207/207 [00:00<00:00, 508623.86it/s]
100%|██████████| 207/207 [00:00<00:00, 164404.64it/s]


In [7]:

for index, block in enumerate(blocks):
    preprocessed_texts = Parallel(n_jobs=-1)(delayed(f)(text) for text in tqdm(block))
    block_text = ' '.join(preprocessed_texts)
    
    tmp = block_text.split('<eos>\n')
    block_text = '<eos>\n'.join(list(filter(None, Parallel(n_jobs=-1)(delayed(clean)(sent) for sent in tqdm(tmp)))))

    #utils.write(f'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_{index+1}/train.txt', block_text)

    utils.write(f'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_{index+1}/train.txt', block_text)
    #block_text += wikipedia_train
    #utils.write(f'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_+_wiki_{index+1}/train.txt', block_text)
    #block_text += lpp
    #utils.write(f'/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_big_+_wiki_+_lpp_{index+1}/train.txt', block_text)


100%|██████████| 3813/3813 [00:35<00:00, 106.95it/s]
100%|██████████| 7813324/7813324 [03:09<00:00, 41331.68it/s]
100%|██████████| 3813/3813 [00:33<00:00, 114.88it/s]
100%|██████████| 7651185/7651185 [03:03<00:00, 41675.15it/s]
100%|██████████| 3813/3813 [00:33<00:00, 112.93it/s]
100%|██████████| 7765644/7765644 [03:02<00:00, 42543.10it/s]
100%|██████████| 3813/3813 [00:39<00:00, 97.15it/s] 
100%|██████████| 8168345/8168345 [03:28<00:00, 39223.58it/s]
100%|██████████| 3813/3813 [00:38<00:00, 100.01it/s]
100%|██████████| 7902582/7902582 [03:29<00:00, 37764.56it/s]
100%|██████████| 3813/3813 [00:35<00:00, 106.43it/s]
100%|██████████| 7725305/7725305 [03:26<00:00, 37455.81it/s]
100%|██████████| 3813/3813 [00:35<00:00, 107.70it/s]
100%|██████████| 7782268/7782268 [03:35<00:00, 36082.45it/s]
100%|██████████| 3813/3813 [00:41<00:00, 92.41it/s] 
100%|██████████| 8007087/8007087 [03:31<00:00, 37843.30it/s]
100%|██████████| 3813/3813 [00:42<00:00, 89.46it/s] 
100%|██████████| 8059516/8059516 [0

In [8]:
for index, _ in enumerate(blocks):
    saving_path = f'/Volumes/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training/Gutenberg_{index+1}'
    create_set(blocks[(index+1)%len(blocks)], saving_path, relative_size=10, seed=1111, name='test')
    create_set(blocks[(index+2)%len(blocks)], saving_path, relative_size=10, seed=1111, name='valid')
    

100%|██████████| 381/381 [00:03<00:00, 97.38it/s] 
100%|██████████| 809265/809265 [00:19<00:00, 42574.77it/s]
100%|██████████| 381/381 [00:01<00:00, 193.37it/s]
100%|██████████| 812736/812736 [00:19<00:00, 41543.22it/s]
100%|██████████| 381/381 [00:01<00:00, 200.87it/s]
100%|██████████| 759097/759097 [00:20<00:00, 36787.82it/s]
100%|██████████| 381/381 [00:02<00:00, 172.42it/s]
100%|██████████| 873157/873157 [00:23<00:00, 36496.89it/s]
100%|██████████| 381/381 [00:01<00:00, 223.97it/s]
100%|██████████| 808014/808014 [00:21<00:00, 38049.66it/s]
100%|██████████| 381/381 [00:01<00:00, 213.34it/s]
100%|██████████| 801758/801758 [00:20<00:00, 39122.33it/s]
100%|██████████| 381/381 [00:02<00:00, 174.79it/s]
100%|██████████| 818731/818731 [00:22<00:00, 36480.15it/s]
100%|██████████| 381/381 [00:02<00:00, 156.66it/s]
100%|██████████| 714980/714980 [00:19<00:00, 35822.08it/s]
100%|██████████| 381/381 [00:02<00:00, 181.61it/s]
100%|██████████| 786877/786877 [00:21<00:00, 37237.01it/s]
100%|█████

### Creating vocabulary files

In [39]:
folder = '/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/data/text/english/lstm_training'
language = 'english'
datasets = ["Gutenberg_small", "Gutenberg_medium", "Gutenberg_large", "Gutenberg_xlarge"]

In [40]:
for dataset in tqdm(datasets):
    path = os.path.join(folder, dataset)
    dico = Dictionary(path, language)
    assert len(dico)==len(dico.word2idx.keys())
    assert len(dico)==len(dico.idx2word)

  0%|          | 0/4 [00:00<?, ?it/s]
  0%|          | 0/51795393 [00:00<?, ?it/s][A
  0%|          | 186236/51795393 [00:00<00:27, 1862352.90it/s][A
  1%|          | 381236/51795393 [00:00<00:27, 1887807.64it/s][A
  1%|          | 587047/51795393 [00:00<00:26, 1935861.61it/s][A
  2%|▏         | 798756/51795393 [00:00<00:25, 1986884.26it/s][A
  2%|▏         | 1012249/51795393 [00:00<00:25, 2029094.08it/s][A
  2%|▏         | 1217669/51795393 [00:00<00:24, 2036560.32it/s][A
  3%|▎         | 1434107/51795393 [00:00<00:24, 2073290.17it/s][A
  3%|▎         | 1651242/51795393 [00:00<00:23, 2101763.04it/s][A
  4%|▎         | 1872837/51795393 [00:00<00:23, 2134763.20it/s][A
  4%|▍         | 2088799/51795393 [00:01<00:23, 2142159.04it/s][A
  4%|▍         | 2310781/51795393 [00:01<00:22, 2164876.78it/s][A
  5%|▍         | 2532063/51795393 [00:01<00:22, 2179037.73it/s][A
  5%|▌         | 2754903/51795393 [00:01<00:22, 2193614.32it/s][A
  6%|▌         | 2976972/51795393 [00:01<00:22,

  0%|          | 589996/159344695 [00:00<01:21, 1941675.65it/s][A
  1%|          | 804774/159344695 [00:00<01:19, 1999228.31it/s][A
  1%|          | 1019242/159344695 [00:00<01:17, 2040748.41it/s][A
  1%|          | 1229881/159344695 [00:00<01:16, 2060006.40it/s][A
  1%|          | 1447175/159344695 [00:00<01:15, 2092633.50it/s][A
  1%|          | 1667100/159344695 [00:00<01:14, 2123516.14it/s][A
  1%|          | 1890528/159344695 [00:00<01:13, 2155573.87it/s][A
  1%|▏         | 2109470/159344695 [00:01<01:12, 2165616.40it/s][A
  1%|▏         | 2329027/159344695 [00:01<01:12, 2174515.39it/s][A
  2%|▏         | 2551393/159344695 [00:01<01:11, 2189026.69it/s][A
  2%|▏         | 2776753/159344695 [00:01<01:10, 2208005.84it/s][A
  2%|▏         | 3001486/159344695 [00:01<01:10, 2219656.88it/s][A
  2%|▏         | 3222326/159344695 [00:01<01:10, 2214645.36it/s][A
  2%|▏         | 3446868/159344695 [00:01<01:10, 2223786.29it/s][A
  2%|▏         | 3674031/159344695 [00:01<01:09, 2

 34%|███▍      | 53838190/159344695 [00:24<00:47, 2222618.11it/s][A
 34%|███▍      | 54060483/159344695 [00:24<00:47, 2219149.26it/s][A
 34%|███▍      | 54287717/159344695 [00:24<00:47, 2234840.65it/s][A
 34%|███▍      | 54513564/159344695 [00:24<00:46, 2241874.83it/s][A
 34%|███▍      | 54737787/159344695 [00:24<00:46, 2241799.23it/s][A
 34%|███▍      | 54961992/159344695 [00:24<00:46, 2239663.00it/s][A
 35%|███▍      | 55187945/159344695 [00:24<00:46, 2245585.36it/s][A
 35%|███▍      | 55412520/159344695 [00:24<00:46, 2242283.61it/s][A
 35%|███▍      | 55636761/159344695 [00:24<00:46, 2235322.78it/s][A
 35%|███▌      | 55860307/159344695 [00:25<00:46, 2229420.14it/s][A
 35%|███▌      | 56083263/159344695 [00:25<00:46, 2223729.11it/s][A
 35%|███▌      | 56306259/159344695 [00:25<00:46, 2225592.17it/s][A
 35%|███▌      | 56528828/159344695 [00:25<00:46, 2224917.20it/s][A
 36%|███▌      | 56751327/159344695 [00:25<00:46, 2217079.15it/s][A
 36%|███▌      | 56975763/15934469

 67%|██████▋   | 106042383/159344695 [00:47<00:24, 2192008.38it/s][A
 67%|██████▋   | 106261618/159344695 [00:48<00:24, 2159415.70it/s][A
 67%|██████▋   | 106479623/159344695 [00:48<00:24, 2165563.94it/s][A
 67%|██████▋   | 106699423/159344695 [00:48<00:24, 2175189.79it/s][A
 67%|██████▋   | 106917020/159344695 [00:48<00:24, 2173512.71it/s][A
 67%|██████▋   | 107135396/159344695 [00:48<00:23, 2176575.86it/s][A
 67%|██████▋   | 107353093/159344695 [00:48<00:24, 2162025.85it/s][A
 68%|██████▊   | 107571405/159344695 [00:48<00:23, 2168308.71it/s][A
 68%|██████▊   | 107792868/159344695 [00:48<00:23, 2181997.89it/s][A
 68%|██████▊   | 108013864/159344695 [00:48<00:23, 2190309.46it/s][A
 68%|██████▊   | 108232936/159344695 [00:48<00:23, 2182530.58it/s][A
 68%|██████▊   | 108452428/159344695 [00:49<00:23, 2186228.65it/s][A
 68%|██████▊   | 108676845/159344695 [00:49<00:22, 2203291.97it/s][A
 68%|██████▊   | 108897462/159344695 [00:49<00:22, 2204152.07it/s][A
 68%|██████▊   | 109

 99%|█████████▉| 157459675/159344695 [01:11<00:00, 2177044.97it/s][A
 99%|█████████▉| 157681326/159344695 [01:11<00:00, 2188733.65it/s][A
 99%|█████████▉| 157900901/159344695 [01:11<00:00, 2190832.91it/s][A
 99%|█████████▉| 158120004/159344695 [01:11<00:00, 2186343.86it/s][A
 99%|█████████▉| 158343066/159344695 [01:11<00:00, 2199438.56it/s][A
100%|█████████▉| 158563382/159344695 [01:12<00:00, 2200551.15it/s][A
100%|█████████▉| 158783635/159344695 [01:12<00:00, 2201141.92it/s][A
100%|█████████▉| 159003764/159344695 [01:12<00:00, 2194489.71it/s][A
100%|██████████| 159344695/159344695 [01:12<00:00, 2201845.49it/s][A
 50%|█████     | 2/4 [02:44<02:10, 65.27s/it]
  0%|          | 0/481715698 [00:00<?, ?it/s][A
  0%|          | 175471/481715698 [00:00<04:34, 1754703.31it/s][A
  0%|          | 359757/481715698 [00:00<04:30, 1780251.08it/s][A
  0%|          | 562728/481715698 [00:00<04:20, 1848403.07it/s][A
  0%|          | 763532/481715698 [00:00<04:13, 1893563.17it/s][A
  0%|  

  9%|▉         | 45705800/481715698 [00:23<03:37, 2009215.34it/s][A
 10%|▉         | 45920641/481715698 [00:23<03:32, 2049041.50it/s][A
 10%|▉         | 46132956/481715698 [00:23<03:30, 2070722.77it/s][A
 10%|▉         | 46340385/481715698 [00:23<03:36, 2009705.85it/s][A
 10%|▉         | 46550012/481715698 [00:23<03:33, 2034914.16it/s][A
 10%|▉         | 46763754/481715698 [00:23<03:30, 2064617.64it/s][A
 10%|▉         | 46974974/481715698 [00:23<03:29, 2078663.37it/s][A
 10%|▉         | 47191215/481715698 [00:23<03:26, 2103094.44it/s][A
 10%|▉         | 47401841/481715698 [00:23<03:27, 2098047.47it/s][A
 10%|▉         | 47613289/481715698 [00:24<03:26, 2102946.46it/s][A
 10%|▉         | 47825939/481715698 [00:24<03:25, 2109956.65it/s][A
 10%|▉         | 48037052/481715698 [00:24<03:25, 2107724.66it/s][A
 10%|█         | 48247908/481715698 [00:24<03:26, 2098819.56it/s][A
 10%|█         | 48458023/481715698 [00:24<03:26, 2099515.75it/s][A
 10%|█         | 48673646/48171569

 20%|█▉        | 96166340/481715698 [00:47<03:02, 2116707.24it/s][A
 20%|██        | 96382573/481715698 [00:47<03:00, 2130189.73it/s][A
 20%|██        | 96598403/481715698 [00:47<03:00, 2138543.20it/s][A
 20%|██        | 96812401/481715698 [00:47<03:06, 2059634.83it/s][A
 20%|██        | 97024916/481715698 [00:47<03:05, 2078860.41it/s][A
 20%|██        | 97241031/481715698 [00:47<03:02, 2102879.34it/s][A
 20%|██        | 97453325/481715698 [00:47<03:02, 2108855.22it/s][A
 20%|██        | 97668867/481715698 [00:47<03:00, 2122609.63it/s][A
 20%|██        | 97885453/481715698 [00:47<02:59, 2135401.37it/s][A
 20%|██        | 98099184/481715698 [00:48<03:04, 2083755.79it/s][A
 20%|██        | 98313834/481715698 [00:48<03:02, 2102188.13it/s][A
 20%|██        | 98529469/481715698 [00:48<03:00, 2118146.46it/s][A
 20%|██        | 98745757/481715698 [00:48<02:59, 2131368.61it/s][A
 21%|██        | 98959093/481715698 [00:48<03:00, 2115836.76it/s][A
 21%|██        | 99176718/48171569

 30%|███       | 146311990/481715698 [01:10<02:37, 2126856.61it/s][A
 30%|███       | 146524763/481715698 [01:10<02:42, 2062146.56it/s][A
 30%|███       | 146737045/481715698 [01:10<02:41, 2079980.40it/s][A
 31%|███       | 146951226/481715698 [01:11<02:39, 2098148.82it/s][A
 31%|███       | 147169488/481715698 [01:11<02:37, 2122794.70it/s][A
 31%|███       | 147386748/481715698 [01:11<02:36, 2137493.96it/s][A
 31%|███       | 147600719/481715698 [01:11<02:36, 2137642.49it/s][A
 31%|███       | 147814639/481715698 [01:11<02:40, 2075016.19it/s][A
 31%|███       | 148028834/481715698 [01:11<02:39, 2094652.06it/s][A
 31%|███       | 148241391/481715698 [01:11<02:38, 2103831.69it/s][A
 31%|███       | 148454668/481715698 [01:11<02:37, 2112428.00it/s][A
 31%|███       | 148666123/481715698 [01:11<02:37, 2112516.65it/s][A
 31%|███       | 148877523/481715698 [01:11<02:37, 2108115.20it/s][A
 31%|███       | 149089499/481715698 [01:12<02:37, 2111594.28it/s][A
 31%|███       | 149

 41%|████      | 196426898/481715698 [01:34<02:14, 2117931.03it/s][A
 41%|████      | 196642118/481715698 [01:34<02:13, 2128095.68it/s][A
 41%|████      | 196858737/481715698 [01:34<02:13, 2139380.13it/s][A
 41%|████      | 197073626/481715698 [01:34<02:12, 2142221.80it/s][A
 41%|████      | 197290326/481715698 [01:34<02:12, 2149593.12it/s][A
 41%|████      | 197505747/481715698 [01:34<02:12, 2150973.64it/s][A
 41%|████      | 197720891/481715698 [01:35<02:12, 2143545.96it/s][A
 41%|████      | 197935284/481715698 [01:35<02:12, 2142932.28it/s][A
 41%|████      | 198149604/481715698 [01:35<02:12, 2137386.26it/s][A
 41%|████      | 198365708/481715698 [01:35<02:12, 2144426.92it/s][A
 41%|████      | 198583012/481715698 [01:35<02:11, 2152928.97it/s][A
 41%|████▏     | 198798328/481715698 [01:35<02:12, 2140532.66it/s][A
 41%|████▏     | 199012414/481715698 [01:35<02:12, 2139390.59it/s][A
 41%|████▏     | 199227799/481715698 [01:35<02:11, 2143704.02it/s][A
 41%|████▏     | 199

 51%|█████     | 246492041/481715698 [01:58<01:50, 2132685.24it/s][A
 51%|█████     | 246705336/481715698 [01:58<01:50, 2132165.98it/s][A
 51%|█████▏    | 246920881/481715698 [01:58<01:49, 2139097.29it/s][A
 51%|█████▏    | 247134809/481715698 [01:58<01:50, 2115270.55it/s][A
 51%|█████▏    | 247347303/481715698 [01:58<01:50, 2118159.72it/s][A
 51%|█████▏    | 247560831/481715698 [01:58<01:50, 2123266.00it/s][A
 51%|█████▏    | 247773197/481715698 [01:58<01:50, 2111722.83it/s][A
 51%|█████▏    | 247990374/481715698 [01:58<01:49, 2129384.52it/s][A
 52%|█████▏    | 248203377/481715698 [01:58<01:49, 2125253.44it/s][A
 52%|█████▏    | 248416691/481715698 [01:58<01:49, 2127610.85it/s][A
 52%|█████▏    | 248629486/481715698 [01:59<01:49, 2127580.81it/s][A
 52%|█████▏    | 248842268/481715698 [01:59<01:49, 2121488.43it/s][A
 52%|█████▏    | 249057431/481715698 [01:59<01:49, 2130439.44it/s][A
 52%|█████▏    | 249272359/481715698 [01:59<01:48, 2136054.41it/s][A
 52%|█████▏    | 249

 62%|██████▏   | 296607992/481715698 [02:21<01:26, 2145290.18it/s][A
 62%|██████▏   | 296823823/481715698 [02:21<01:26, 2149177.16it/s][A
 62%|██████▏   | 297038755/481715698 [02:21<01:26, 2141427.85it/s][A
 62%|██████▏   | 297252915/481715698 [02:22<01:26, 2134827.57it/s][A
 62%|██████▏   | 297466414/481715698 [02:22<01:26, 2118592.46it/s][A
 62%|██████▏   | 297678708/481715698 [02:22<01:26, 2119893.94it/s][A
 62%|██████▏   | 297891588/481715698 [02:22<01:26, 2122555.50it/s][A
 62%|██████▏   | 298103864/481715698 [02:22<01:26, 2119537.13it/s][A
 62%|██████▏   | 298317049/481715698 [02:22<01:26, 2123215.12it/s][A
 62%|██████▏   | 298529482/481715698 [02:22<01:26, 2123547.03it/s][A
 62%|██████▏   | 298743750/481715698 [02:22<01:25, 2129250.04it/s][A
 62%|██████▏   | 298960439/481715698 [02:22<01:25, 2140400.07it/s][A
 62%|██████▏   | 299178851/481715698 [02:22<01:24, 2153330.25it/s][A
 62%|██████▏   | 299397415/481715698 [02:23<01:24, 2162921.51it/s][A
 62%|██████▏   | 299

 72%|███████▏  | 346791071/481715698 [02:45<01:03, 2140911.73it/s][A
 72%|███████▏  | 347005219/481715698 [02:45<01:07, 1986083.27it/s][A
 72%|███████▏  | 347214766/481715698 [02:45<01:06, 2017680.24it/s][A
 72%|███████▏  | 347426360/481715698 [02:45<01:05, 2046185.56it/s][A
 72%|███████▏  | 347639568/481715698 [02:45<01:04, 2071217.45it/s][A
 72%|███████▏  | 347854709/481715698 [02:45<01:03, 2094638.07it/s][A
 72%|███████▏  | 348069450/481715698 [02:46<01:03, 2110194.40it/s][A
 72%|███████▏  | 348284795/481715698 [02:46<01:02, 2122986.61it/s][A
 72%|███████▏  | 348499031/481715698 [02:46<01:02, 2128760.83it/s][A
 72%|███████▏  | 348715998/481715698 [02:46<01:02, 2140866.75it/s][A
 72%|███████▏  | 348930605/481715698 [02:46<01:01, 2142422.63it/s][A
 72%|███████▏  | 349145776/481715698 [02:46<01:01, 2145197.96it/s][A
 73%|███████▎  | 349360404/481715698 [02:46<01:02, 2123023.73it/s][A
 73%|███████▎  | 349573032/481715698 [02:46<01:02, 2123997.13it/s][A
 73%|███████▎  | 349

 82%|████████▏ | 396661882/481715698 [03:08<00:43, 1934807.74it/s][A
 82%|████████▏ | 396855418/481715698 [03:09<00:44, 1923294.27it/s][A
 82%|████████▏ | 397048281/481715698 [03:09<00:43, 1924891.06it/s][A
 82%|████████▏ | 397244059/481715698 [03:09<00:43, 1934621.17it/s][A
 83%|████████▎ | 397437560/481715698 [03:09<00:44, 1913490.54it/s][A
 83%|████████▎ | 397634730/481715698 [03:09<00:43, 1930587.11it/s][A
 83%|████████▎ | 397827881/481715698 [03:09<00:43, 1929287.70it/s][A
 83%|████████▎ | 398020875/481715698 [03:09<00:43, 1916176.20it/s][A
 83%|████████▎ | 398216006/481715698 [03:09<00:43, 1926581.96it/s][A
 83%|████████▎ | 398408724/481715698 [03:09<00:43, 1922383.47it/s][A
 83%|████████▎ | 398605974/481715698 [03:10<00:42, 1937148.16it/s][A
 83%|████████▎ | 398800409/481715698 [03:10<00:42, 1939299.51it/s][A
 83%|████████▎ | 398994380/481715698 [03:10<00:42, 1938645.41it/s][A
 83%|████████▎ | 399191679/481715698 [03:10<00:42, 1948821.75it/s][A
 83%|████████▎ | 399

 92%|█████████▏| 442740553/481715698 [03:32<00:19, 1975349.48it/s][A
 92%|█████████▏| 442938119/481715698 [03:32<00:19, 1971986.50it/s][A
 92%|█████████▏| 443139132/481715698 [03:32<00:19, 1983275.84it/s][A
 92%|█████████▏| 443337491/481715698 [03:32<00:19, 1975302.77it/s][A
 92%|█████████▏| 443540274/481715698 [03:33<00:19, 1990772.13it/s][A
 92%|█████████▏| 443739401/481715698 [03:33<00:19, 1976823.33it/s][A
 92%|█████████▏| 443937141/481715698 [03:33<00:19, 1967763.04it/s][A
 92%|█████████▏| 444134187/481715698 [03:33<00:19, 1968570.51it/s][A
 92%|█████████▏| 444331079/481715698 [03:33<00:19, 1963553.13it/s][A
 92%|█████████▏| 444528463/481715698 [03:33<00:18, 1966625.67it/s][A
 92%|█████████▏| 444728860/481715698 [03:33<00:18, 1977679.77it/s][A
 92%|█████████▏| 444926657/481715698 [03:33<00:18, 1964376.01it/s][A
 92%|█████████▏| 445124772/481715698 [03:33<00:18, 1969376.07it/s][A
 92%|█████████▏| 445321742/481715698 [03:33<00:18, 1968488.33it/s][A
 92%|█████████▏| 445

  1%|          | 7862132/954997913 [00:03<07:07, 2215239.12it/s][A
  1%|          | 8085247/954997913 [00:03<07:06, 2219987.56it/s][A
  1%|          | 8307532/954997913 [00:03<07:06, 2220844.57it/s][A
  1%|          | 8529621/954997913 [00:03<07:06, 2217363.38it/s][A
  1%|          | 8752064/954997913 [00:04<07:06, 2219476.11it/s][A
  1%|          | 8974015/954997913 [00:04<07:07, 2214973.37it/s][A
  1%|          | 9196308/954997913 [00:04<07:06, 2217353.42it/s][A
  1%|          | 9418155/954997913 [00:04<07:06, 2217685.74it/s][A
  1%|          | 9639927/954997913 [00:04<07:07, 2213082.49it/s][A
  1%|          | 9862483/954997913 [00:04<07:06, 2216808.50it/s][A
  1%|          | 10084169/954997913 [00:04<07:06, 2214398.15it/s][A
  1%|          | 10305613/954997913 [00:04<07:07, 2210157.38it/s][A
  1%|          | 10526634/954997913 [00:04<07:08, 2204382.69it/s][A
  1%|          | 10747079/954997913 [00:04<07:08, 2201650.92it/s][A
  1%|          | 10967408/954997913 [00:05<0

  6%|▋         | 59908410/954997913 [00:27<06:45, 2206733.40it/s][A
  6%|▋         | 60129110/954997913 [00:27<06:45, 2205957.32it/s][A
  6%|▋         | 60350800/954997913 [00:27<06:44, 2209226.24it/s][A
  6%|▋         | 60571987/954997913 [00:27<06:44, 2210017.76it/s][A
  6%|▋         | 60794481/954997913 [00:27<06:43, 2214472.43it/s][A
  6%|▋         | 61017153/954997913 [00:27<06:43, 2218128.42it/s][A
  6%|▋         | 61241794/954997913 [00:27<06:41, 2226536.89it/s][A
  6%|▋         | 61465216/954997913 [00:28<06:40, 2228833.73it/s][A
  6%|▋         | 61688109/954997913 [00:28<06:41, 2227340.27it/s][A
  6%|▋         | 61910850/954997913 [00:28<06:41, 2224240.55it/s][A
  7%|▋         | 62133280/954997913 [00:28<06:43, 2214551.04it/s][A
  7%|▋         | 62356087/954997913 [00:28<06:42, 2218586.94it/s][A
  7%|▋         | 62577958/954997913 [00:28<06:43, 2212044.47it/s][A
  7%|▋         | 62802513/954997913 [00:28<06:41, 2221988.16it/s][A
  7%|▋         | 63024732/95499791

 12%|█▏        | 112093174/954997913 [00:50<06:19, 2221531.32it/s][A
 12%|█▏        | 112315336/954997913 [00:51<06:21, 2209074.90it/s][A
 12%|█▏        | 112537539/954997913 [00:51<06:20, 2212944.60it/s][A
 12%|█▏        | 112758851/954997913 [00:51<06:21, 2209049.56it/s][A
 12%|█▏        | 112980073/954997913 [00:51<06:21, 2209998.79it/s][A
 12%|█▏        | 113201083/954997913 [00:51<06:20, 2209693.15it/s][A
 12%|█▏        | 113422059/954997913 [00:51<06:21, 2208524.38it/s][A
 12%|█▏        | 113642917/954997913 [00:51<06:22, 2200146.33it/s][A
 12%|█▏        | 113863215/954997913 [00:51<06:22, 2200994.72it/s][A
 12%|█▏        | 114084835/954997913 [00:51<06:21, 2205531.84it/s][A
 12%|█▏        | 114305396/954997913 [00:51<06:21, 2201006.35it/s][A
 12%|█▏        | 114525505/954997913 [00:52<06:22, 2199886.61it/s][A
 12%|█▏        | 114745499/954997913 [00:52<06:22, 2198833.82it/s][A
 12%|█▏        | 114969860/954997913 [00:52<06:19, 2212076.95it/s][A
 12%|█▏        | 115

 17%|█▋        | 164112650/954997913 [01:14<05:56, 2219748.47it/s][A
 17%|█▋        | 164334643/954997913 [01:14<05:56, 2219573.38it/s][A
 17%|█▋        | 164557338/954997913 [01:14<05:55, 2221780.28it/s][A
 17%|█▋        | 164779526/954997913 [01:14<05:57, 2210833.71it/s][A
 17%|█▋        | 165000629/954997913 [01:14<05:59, 2199449.13it/s][A
 17%|█▋        | 165224337/954997913 [01:14<05:57, 2210602.29it/s][A
 17%|█▋        | 165450485/954997913 [01:15<05:54, 2225622.66it/s][A
 17%|█▋        | 165673094/954997913 [01:15<05:55, 2220215.74it/s][A
 17%|█▋        | 165895152/954997913 [01:15<05:57, 2209465.26it/s][A
 17%|█▋        | 166116136/954997913 [01:15<05:58, 2200718.87it/s][A
 17%|█▋        | 166336242/954997913 [01:15<05:58, 2200485.05it/s][A
 17%|█▋        | 166556315/954997913 [01:15<05:59, 2191067.52it/s][A
 17%|█▋        | 166777444/954997913 [01:15<05:58, 2197092.82it/s][A
 17%|█▋        | 166998539/954997913 [01:15<05:57, 2201229.20it/s][A
 18%|█▊        | 167

 23%|██▎       | 216049982/954997913 [01:37<05:35, 2200805.99it/s][A
 23%|██▎       | 216275235/954997913 [01:38<05:33, 2216069.55it/s][A
 23%|██▎       | 216496878/954997913 [01:38<05:33, 2214754.35it/s][A
 23%|██▎       | 216718379/954997913 [01:38<05:34, 2209525.20it/s][A
 23%|██▎       | 216939352/954997913 [01:38<05:34, 2208753.17it/s][A
 23%|██▎       | 217160242/954997913 [01:38<05:34, 2205211.26it/s][A
 23%|██▎       | 217380775/954997913 [01:38<05:35, 2199536.89it/s][A
 23%|██▎       | 217602150/954997913 [01:38<05:34, 2203779.12it/s][A
 23%|██▎       | 217822538/954997913 [01:38<05:36, 2188765.77it/s][A
 23%|██▎       | 218043020/954997913 [01:38<05:35, 2193554.94it/s][A
 23%|██▎       | 218264870/954997913 [01:39<05:34, 2200976.86it/s][A
 23%|██▎       | 218486188/954997913 [01:39<05:34, 2204622.70it/s][A
 23%|██▎       | 218709398/954997913 [01:39<05:32, 2212792.09it/s][A
 23%|██▎       | 218930882/954997913 [01:39<05:32, 2213403.53it/s][A
 23%|██▎       | 219

 28%|██▊       | 267278934/954997913 [02:01<05:18, 2157224.62it/s][A
 28%|██▊       | 267496656/954997913 [02:01<05:17, 2163183.64it/s][A
 28%|██▊       | 267712985/954997913 [02:01<05:17, 2162517.06it/s][A
 28%|██▊       | 267929244/954997913 [02:01<05:17, 2162019.98it/s][A
 28%|██▊       | 268145451/954997913 [02:01<05:19, 2151538.93it/s][A
 28%|██▊       | 268361147/954997913 [02:02<05:18, 2153159.93it/s][A
 28%|██▊       | 268578745/954997913 [02:02<05:17, 2159954.61it/s][A
 28%|██▊       | 268796738/954997913 [02:02<05:16, 2165906.22it/s][A
 28%|██▊       | 269013342/954997913 [02:02<05:16, 2164751.70it/s][A
 28%|██▊       | 269229827/954997913 [02:02<05:18, 2156148.86it/s][A
 28%|██▊       | 269448529/954997913 [02:02<05:18, 2154392.20it/s][A
 28%|██▊       | 269666232/954997913 [02:02<05:17, 2161131.50it/s][A
 28%|██▊       | 269886200/954997913 [02:02<05:15, 2172550.98it/s][A
 28%|██▊       | 270105584/954997913 [02:02<05:14, 2178891.70it/s][A
 28%|██▊       | 270

 33%|███▎      | 318130746/954997913 [02:25<04:51, 2182541.98it/s][A
 33%|███▎      | 318349654/954997913 [02:25<04:51, 2184498.34it/s][A
 33%|███▎      | 318568291/954997913 [02:25<04:51, 2185057.00it/s][A
 33%|███▎      | 318786903/954997913 [02:25<04:51, 2184507.98it/s][A
 33%|███▎      | 319005428/954997913 [02:25<04:53, 2168617.34it/s][A
 33%|███▎      | 319222369/954997913 [02:25<04:53, 2164341.16it/s][A
 33%|███▎      | 319441689/954997913 [02:25<04:52, 2172913.21it/s][A
 33%|███▎      | 319659029/954997913 [02:25<04:56, 2139445.50it/s][A
 33%|███▎      | 319873781/954997913 [02:25<04:56, 2141852.91it/s][A
 34%|███▎      | 320089350/954997913 [02:25<04:55, 2145984.43it/s][A
 34%|███▎      | 320304025/954997913 [02:26<04:55, 2144958.47it/s][A
 34%|███▎      | 320521003/954997913 [02:26<04:54, 2152341.12it/s][A
 34%|███▎      | 320737682/954997913 [02:26<04:54, 2156654.59it/s][A
 34%|███▎      | 320954668/954997913 [02:26<04:53, 2160596.84it/s][A
 34%|███▎      | 321

 39%|███▊      | 368675179/954997913 [02:48<04:34, 2134429.51it/s][A
 39%|███▊      | 368888719/954997913 [02:48<04:34, 2133134.91it/s][A
 39%|███▊      | 369105415/954997913 [02:48<04:33, 2143168.61it/s][A
 39%|███▊      | 369320300/954997913 [02:48<04:33, 2144867.42it/s][A
 39%|███▊      | 369536988/954997913 [02:49<04:32, 2151421.65it/s][A
 39%|███▊      | 369752164/954997913 [02:49<04:32, 2147942.97it/s][A
 39%|███▊      | 369966984/954997913 [02:49<04:32, 2147521.52it/s][A
 39%|███▉      | 370182279/954997913 [02:49<04:32, 2149143.20it/s][A
 39%|███▉      | 370398467/954997913 [02:49<04:31, 2152946.00it/s][A
 39%|███▉      | 370613772/954997913 [02:49<04:31, 2150406.21it/s][A
 39%|███▉      | 370828821/954997913 [02:49<04:32, 2145255.02it/s][A
 39%|███▉      | 371044583/954997913 [02:49<04:31, 2148945.63it/s][A
 39%|███▉      | 371259485/954997913 [02:49<04:36, 2111195.69it/s][A
 39%|███▉      | 371477367/954997913 [02:49<04:33, 2131035.69it/s][A
 39%|███▉      | 371

 44%|████▍     | 418972729/954997913 [03:12<04:13, 2112464.94it/s][A
 44%|████▍     | 419187381/954997913 [03:12<04:12, 2122563.55it/s][A
 44%|████▍     | 419399689/954997913 [03:12<04:12, 2118876.31it/s][A
 44%|████▍     | 419616451/954997913 [03:12<04:10, 2133265.27it/s][A
 44%|████▍     | 419829827/954997913 [03:12<04:11, 2126350.91it/s][A
 44%|████▍     | 420042502/954997913 [03:12<04:13, 2114154.78it/s][A
 44%|████▍     | 420256725/954997913 [03:12<04:11, 2122497.37it/s][A
 44%|████▍     | 420469014/954997913 [03:12<04:15, 2091216.44it/s][A
 44%|████▍     | 420683595/954997913 [03:13<04:13, 2107298.21it/s][A
 44%|████▍     | 420894446/954997913 [03:13<04:13, 2106240.64it/s][A
 44%|████▍     | 421105155/954997913 [03:13<04:15, 2089626.51it/s][A
 44%|████▍     | 421318489/954997913 [03:13<04:13, 2102548.92it/s][A
 44%|████▍     | 421533266/954997913 [03:13<04:12, 2115911.64it/s][A
 44%|████▍     | 421745475/954997913 [03:13<04:11, 2117758.95it/s][A
 44%|████▍     | 421

 49%|████▉     | 469308985/954997913 [03:35<03:46, 2142130.37it/s][A
 49%|████▉     | 469526935/954997913 [03:36<03:45, 2153205.10it/s][A
 49%|████▉     | 469742284/954997913 [03:36<03:45, 2150969.80it/s][A
 49%|████▉     | 469958002/954997913 [03:36<03:45, 2152828.18it/s][A
 49%|████▉     | 470173464/954997913 [03:36<03:45, 2153362.95it/s][A
 49%|████▉     | 470391920/954997913 [03:36<03:44, 2162627.19it/s][A
 49%|████▉     | 470608200/954997913 [03:36<03:45, 2148557.53it/s][A
 49%|████▉     | 470825857/954997913 [03:36<03:44, 2156882.82it/s][A
 49%|████▉     | 471041576/954997913 [03:36<03:44, 2151678.87it/s][A
 49%|████▉     | 471257190/954997913 [03:36<03:44, 2153014.35it/s][A
 49%|████▉     | 471472509/954997913 [03:36<03:44, 2151967.19it/s][A
 49%|████▉     | 471687718/954997913 [03:37<03:44, 2151367.62it/s][A
 49%|████▉     | 471902864/954997913 [03:37<03:47, 2127098.08it/s][A
 49%|████▉     | 472118884/954997913 [03:37<03:45, 2136919.23it/s][A
 49%|████▉     | 472

 54%|█████▍    | 519601096/954997913 [03:59<03:21, 2157578.51it/s][A
 54%|█████▍    | 519816865/954997913 [03:59<03:22, 2151287.12it/s][A
 54%|█████▍    | 520032006/954997913 [03:59<03:22, 2145514.86it/s][A
 54%|█████▍    | 520246570/954997913 [03:59<03:23, 2141076.87it/s][A
 54%|█████▍    | 520460689/954997913 [04:00<03:23, 2136841.26it/s][A
 55%|█████▍    | 520674383/954997913 [04:00<03:23, 2136738.53it/s][A
 55%|█████▍    | 520890904/954997913 [04:00<03:22, 2145200.13it/s][A
 55%|█████▍    | 521105437/954997913 [04:00<03:23, 2136899.41it/s][A
 55%|█████▍    | 521324594/954997913 [04:00<03:21, 2153009.61it/s][A
 55%|█████▍    | 521542055/954997913 [04:00<03:20, 2159442.08it/s][A
 55%|█████▍    | 521761094/954997913 [04:00<03:19, 2168631.78it/s][A
 55%|█████▍    | 521977990/954997913 [04:00<03:20, 2159898.17it/s][A
 55%|█████▍    | 522194011/954997913 [04:00<03:20, 2158189.52it/s][A
 55%|█████▍    | 522409852/954997913 [04:00<03:20, 2153225.11it/s][A
 55%|█████▍    | 522

 60%|█████▉    | 569926607/954997913 [04:23<03:03, 2093151.56it/s][A
 60%|█████▉    | 570141384/954997913 [04:23<03:02, 2109240.79it/s][A
 60%|█████▉    | 570358621/954997913 [04:23<03:00, 2127788.53it/s][A
 60%|█████▉    | 570571792/954997913 [04:23<03:02, 2106084.41it/s][A
 60%|█████▉    | 570782864/954997913 [04:23<03:02, 2107472.04it/s][A
 60%|█████▉    | 570994031/954997913 [04:23<03:02, 2108727.27it/s][A
 60%|█████▉    | 571211579/954997913 [04:23<03:00, 2128315.09it/s][A
 60%|█████▉    | 571424566/954997913 [04:23<03:10, 2010427.07it/s][A
 60%|█████▉    | 571636455/954997913 [04:24<03:07, 2041779.58it/s][A
 60%|█████▉    | 571849728/954997913 [04:24<03:05, 2068235.92it/s][A
 60%|█████▉    | 572061358/954997913 [04:24<03:03, 2082423.46it/s][A
 60%|█████▉    | 572275312/954997913 [04:24<03:02, 2099233.28it/s][A
 60%|█████▉    | 572489100/954997913 [04:24<03:01, 2110677.40it/s][A
 60%|█████▉    | 572704777/954997913 [04:24<02:59, 2124294.59it/s][A
 60%|█████▉    | 572

 65%|██████▍   | 620285870/954997913 [04:46<02:36, 2136999.99it/s][A
 65%|██████▍   | 620502265/954997913 [04:46<02:35, 2145013.33it/s][A
 65%|██████▍   | 620716779/954997913 [04:47<02:36, 2140987.90it/s][A
 65%|██████▌   | 620932821/954997913 [04:47<02:35, 2146776.76it/s][A
 65%|██████▌   | 621150017/954997913 [04:47<02:34, 2154267.75it/s][A
 65%|██████▌   | 621366298/954997913 [04:47<02:34, 2156820.87it/s][A
 65%|██████▌   | 621582687/954997913 [04:47<02:34, 2158935.83it/s][A
 65%|██████▌   | 621798589/954997913 [04:47<02:35, 2149472.98it/s][A
 65%|██████▌   | 622013552/954997913 [04:47<02:35, 2143847.41it/s][A
 65%|██████▌   | 622227951/954997913 [04:47<02:35, 2142173.28it/s][A
 65%|██████▌   | 622446148/954997913 [04:47<02:34, 2153958.12it/s][A
 65%|██████▌   | 622661567/954997913 [04:47<02:34, 2151212.74it/s][A
 65%|██████▌   | 622876705/954997913 [04:48<02:35, 2141592.36it/s][A
 65%|██████▌   | 623090886/954997913 [04:48<02:35, 2135652.41it/s][A
 65%|██████▌   | 623

 70%|███████   | 670471444/954997913 [05:10<02:13, 2130670.38it/s][A
 70%|███████   | 670684608/954997913 [05:10<02:13, 2130958.74it/s][A
 70%|███████   | 670897730/954997913 [05:10<02:13, 2123122.06it/s][A
 70%|███████   | 671110996/954997913 [05:10<02:13, 2125972.05it/s][A
 70%|███████   | 671323787/954997913 [05:10<02:13, 2126550.63it/s][A
 70%|███████   | 671540782/954997913 [05:11<02:12, 2139384.65it/s][A
 70%|███████   | 671756281/954997913 [05:11<02:12, 2144040.03it/s][A
 70%|███████   | 671973742/954997913 [05:11<02:11, 2153117.95it/s][A
 70%|███████   | 672189078/954997913 [05:11<02:11, 2150763.86it/s][A
 70%|███████   | 672406524/954997913 [05:11<02:10, 2157817.38it/s][A
 70%|███████   | 672622324/954997913 [05:11<02:10, 2155968.43it/s][A
 70%|███████   | 672837934/954997913 [05:11<02:11, 2151547.42it/s][A
 70%|███████   | 673053100/954997913 [05:11<02:11, 2147048.62it/s][A
 70%|███████   | 673267815/954997913 [05:11<02:11, 2144211.80it/s][A
 71%|███████   | 673

 75%|███████▌  | 720750823/954997913 [05:34<01:51, 2109331.82it/s][A
 75%|███████▌  | 720961820/954997913 [05:34<01:51, 2097513.80it/s][A
 76%|███████▌  | 721177440/954997913 [05:34<01:50, 2114779.10it/s][A
 76%|███████▌  | 721388994/954997913 [05:34<01:51, 2103022.47it/s][A
 76%|███████▌  | 721601921/954997913 [05:34<01:50, 2110827.63it/s][A
 76%|███████▌  | 721819421/954997913 [05:34<01:49, 2129675.79it/s][A
 76%|███████▌  | 722032467/954997913 [05:34<01:49, 2122790.91it/s][A
 76%|███████▌  | 722245487/954997913 [05:34<01:49, 2125004.28it/s][A
 76%|███████▌  | 722458030/954997913 [05:34<01:50, 2096321.04it/s][A
 76%|███████▌  | 722671764/954997913 [05:35<01:50, 2108459.56it/s][A
 76%|███████▌  | 722886184/954997913 [05:35<01:49, 2119055.06it/s][A
 76%|███████▌  | 723098172/954997913 [05:35<01:50, 2101360.52it/s][A
 76%|███████▌  | 723312646/954997913 [05:35<01:49, 2114180.64it/s][A
 76%|███████▌  | 723524147/954997913 [05:35<01:50, 2089236.37it/s][A
 76%|███████▌  | 723

 81%|████████  | 771016317/954997913 [05:57<01:26, 2137621.23it/s][A
 81%|████████  | 771231220/954997913 [05:57<01:25, 2141030.20it/s][A
 81%|████████  | 771446505/954997913 [05:57<01:25, 2144561.58it/s][A
 81%|████████  | 771661835/954997913 [05:58<01:25, 2147171.67it/s][A
 81%|████████  | 771879596/954997913 [05:58<01:24, 2156212.53it/s][A
 81%|████████  | 772096521/954997913 [05:58<01:24, 2160106.36it/s][A
 81%|████████  | 772313206/954997913 [05:58<01:24, 2162124.12it/s][A
 81%|████████  | 772529426/954997913 [05:58<01:24, 2152663.15it/s][A
 81%|████████  | 772747000/954997913 [05:58<01:24, 2159533.72it/s][A
 81%|████████  | 772962969/954997913 [05:58<01:24, 2150645.91it/s][A
 81%|████████  | 773178053/954997913 [05:58<01:24, 2150151.13it/s][A
 81%|████████  | 773393082/954997913 [05:58<01:24, 2149495.44it/s][A
 81%|████████  | 773608104/954997913 [05:59<01:24, 2149710.30it/s][A
 81%|████████  | 773823082/954997913 [05:59<01:24, 2144533.59it/s][A
 81%|████████  | 774

 86%|████████▌ | 817325668/954997913 [06:21<01:12, 1908484.30it/s][A
 86%|████████▌ | 817520409/954997913 [06:21<01:11, 1919996.82it/s][A
 86%|████████▌ | 817716915/954997913 [06:21<01:11, 1933295.08it/s][A
 86%|████████▌ | 817911080/954997913 [06:21<01:10, 1935791.79it/s][A
 86%|████████▌ | 818104838/954997913 [06:21<01:12, 1894592.06it/s][A
 86%|████████▌ | 818301810/954997913 [06:21<01:11, 1916520.92it/s][A
 86%|████████▌ | 818496600/954997913 [06:22<01:10, 1925825.80it/s][A
 86%|████████▌ | 818691012/954997913 [06:22<01:10, 1931276.98it/s][A
 86%|████████▌ | 818884291/954997913 [06:22<01:13, 1864146.49it/s][A
 86%|████████▌ | 819085639/954997913 [06:22<01:11, 1906566.96it/s][A
 86%|████████▌ | 819279079/954997913 [06:22<01:10, 1914830.24it/s][A
 86%|████████▌ | 819477867/954997913 [06:22<01:09, 1936174.35it/s][A
 86%|████████▌ | 819675913/954997913 [06:22<01:09, 1949249.86it/s][A
 86%|████████▌ | 819876517/954997913 [06:22<01:08, 1965945.56it/s][A
 86%|████████▌ | 820

 90%|█████████ | 863696495/954997913 [06:45<00:46, 1975965.77it/s][A
 90%|█████████ | 863897027/954997913 [06:45<00:45, 1984679.15it/s][A
 90%|█████████ | 864095540/954997913 [06:45<00:45, 1984159.40it/s][A
 91%|█████████ | 864293988/954997913 [06:45<00:45, 1984251.88it/s][A
 91%|█████████ | 864492436/954997913 [06:45<00:45, 1981575.72it/s][A
 91%|█████████ | 864691492/954997913 [06:45<00:45, 1984261.63it/s][A
 91%|█████████ | 864889931/954997913 [06:45<00:45, 1971835.04it/s][A
 91%|█████████ | 865090048/954997913 [06:45<00:45, 1980543.98it/s][A
 91%|█████████ | 865288130/954997913 [06:45<00:45, 1978820.11it/s][A
 91%|█████████ | 865486032/954997913 [06:45<00:45, 1971669.14it/s][A
 91%|█████████ | 865683219/954997913 [06:46<00:45, 1966056.21it/s][A
 91%|█████████ | 865880055/954997913 [06:46<00:45, 1966744.53it/s][A
 91%|█████████ | 866082137/954997913 [06:46<00:44, 1982640.53it/s][A
 91%|█████████ | 866280441/954997913 [06:46<00:44, 1973221.48it/s][A
 91%|█████████ | 866

 95%|█████████▌| 909990778/954997913 [07:08<00:22, 1970314.35it/s][A
 95%|█████████▌| 910191657/954997913 [07:08<00:22, 1981699.15it/s][A
 95%|█████████▌| 910389850/954997913 [07:08<00:22, 1961504.60it/s][A
 95%|█████████▌| 910586064/954997913 [07:09<00:22, 1956982.47it/s][A
 95%|█████████▌| 910781809/954997913 [07:09<00:22, 1951219.38it/s][A
 95%|█████████▌| 910977450/954997913 [07:09<00:22, 1952770.05it/s][A
 95%|█████████▌| 911172753/954997913 [07:09<00:22, 1949445.29it/s][A
 95%|█████████▌| 911372583/954997913 [07:09<00:22, 1963846.81it/s][A
 95%|█████████▌| 911570421/954997913 [07:09<00:22, 1968183.46it/s][A
 95%|█████████▌| 911767608/954997913 [07:09<00:21, 1969287.13it/s][A
 95%|█████████▌| 911964558/954997913 [07:09<00:21, 1966259.16it/s][A
 96%|█████████▌| 912163101/954997913 [07:09<00:21, 1971970.58it/s][A
 96%|█████████▌| 912361983/954997913 [07:09<00:21, 1976984.67it/s][A
 96%|█████████▌| 912562913/954997913 [07:10<00:21, 1986568.76it/s][A
 96%|█████████▌| 912