# Preprocess corpus

In [None]:
import pandas as pd

class ProcessCorpus:
    def __init__(self, filepath='/kaggle/input/bkai-ai-track2-legal-document-retrieval/Legal Document Retrieval/corpus.csv'):
        self.df = pd.read_csv(filepath, usecols=['text', 'cid'])

    def process_text(self):
        self.df['text'] = self.df['text'].apply(lambda x: ' '.join(x.split()))
        self.save_to_csv()

    def save_to_csv(self):
        output_path = 'preprocessed_corpus.csv'
        self.df.to_csv(output_path, index=False)


if __name__ == "__main__":
    processor = ProcessCorpus()
    processor.process_text()

# Encode 
- preprocessed_corpus.csv to encoded_corpus.json
- public_test.csv to encoded_public_test.json
- train.csv to encoded_train_full.json

In [2]:
import pandas as pd
import numpy as np
import torch
!pip install sentence_transformers
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Encode corpus.csv
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

df = pd.read_csv('/kaggle/input/preprocessed-corpus/preprocessed_corpus.csv')

model = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder')
model = model.to(device)

def encode(lst = [], convert_to_tensor=True, batch_size=128):
    vectors = []
    with tqdm(total = len(lst), desc="Encoding texts") as pbar:
        # Process in batches
        for i in range(0, len(lst), batch_size):
            batch = lst[i:i + batch_size]
            encoded_batch = model.encode(batch, convert_to_tensor=True, device=device)
            # Move to CPU before converting to numpy
            if torch.cuda.is_available():
                encoded_batch = encoded_batch.cpu()
            vectors.extend([np.array(arr) for arr in encoded_batch.numpy()])
            pbar.update(len(batch))
    return vectors

df['vector'] = encode(lst=list(df['text']))

df.to_json('encoded_corpus.json')


  pid, fd = os.forkpty()




Encoding texts:   0%|          | 0/261597 [00:00<?, ?it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 128/261597 [00:01<44:54, 97.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 256/261597 [00:02<35:52, 121.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 384/261597 [00:03<32:43, 133.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 512/261597 [00:03<31:08, 139.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 640/261597 [00:04<30:19, 143.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 768/261597 [00:05<30:42, 141.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 896/261597 [00:06<30:32, 142.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 1024/261597 [00:07<30:30, 142.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 1152/261597 [00:08<31:09, 139.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 1280/261597 [00:09<30:26, 142.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 1408/261597 [00:10<30:03, 144.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 1536/261597 [00:11<30:53, 140.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 1664/261597 [00:11<30:01, 144.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 1792/261597 [00:12<29:19, 147.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 1920/261597 [00:13<28:52, 149.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2048/261597 [00:14<29:32, 146.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2176/261597 [00:15<29:00, 149.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2304/261597 [00:16<28:53, 149.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2432/261597 [00:17<29:16, 147.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2560/261597 [00:17<29:27, 146.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2688/261597 [00:18<29:01, 148.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2816/261597 [00:19<28:37, 150.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2944/261597 [00:20<28:01, 153.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 3072/261597 [00:21<28:20, 152.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 3200/261597 [00:22<28:59, 148.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|▏         | 3328/261597 [00:23<29:15, 147.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|▏         | 3456/261597 [00:23<29:41, 144.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|▏         | 3584/261597 [00:24<29:18, 146.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|▏         | 3712/261597 [00:25<28:55, 148.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|▏         | 3840/261597 [00:26<28:58, 148.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 3968/261597 [00:27<29:15, 146.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4096/261597 [00:28<29:20, 146.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4224/261597 [00:29<29:39, 144.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4352/261597 [00:30<30:19, 141.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4480/261597 [00:30<28:56, 148.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4608/261597 [00:31<27:55, 153.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4736/261597 [00:32<27:43, 154.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4864/261597 [00:33<27:46, 154.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4992/261597 [00:34<28:19, 150.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5120/261597 [00:35<28:41, 148.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5248/261597 [00:36<29:02, 147.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5376/261597 [00:36<28:54, 147.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5504/261597 [00:37<29:51, 142.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5632/261597 [00:38<29:46, 143.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5760/261597 [00:39<29:12, 145.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5888/261597 [00:40<28:33, 149.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 6016/261597 [00:41<28:55, 147.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 6144/261597 [00:42<29:10, 145.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 6272/261597 [00:43<29:56, 142.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 6400/261597 [00:44<30:14, 140.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 6528/261597 [00:44<30:29, 139.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 6656/261597 [00:45<29:35, 143.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 6784/261597 [00:46<28:57, 146.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 6912/261597 [00:47<29:46, 142.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7040/261597 [00:48<29:36, 143.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7168/261597 [00:49<29:18, 144.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7296/261597 [00:50<29:36, 143.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7424/261597 [00:51<30:01, 141.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7552/261597 [00:52<29:08, 145.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7680/261597 [00:52<28:56, 146.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7808/261597 [00:53<29:52, 141.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7936/261597 [00:54<30:13, 139.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8064/261597 [00:55<29:29, 143.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8192/261597 [00:56<29:13, 144.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8320/261597 [00:57<29:01, 145.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8448/261597 [00:58<29:55, 140.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8576/261597 [00:59<31:18, 134.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8704/261597 [01:00<30:34, 137.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8832/261597 [01:01<29:17, 143.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8960/261597 [01:01<29:26, 142.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 9088/261597 [01:02<29:58, 140.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▎         | 9216/261597 [01:03<30:17, 138.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▎         | 9344/261597 [01:04<29:05, 144.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▎         | 9472/261597 [01:05<29:20, 143.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▎         | 9600/261597 [01:06<29:39, 141.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▎         | 9728/261597 [01:07<31:50, 131.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 9856/261597 [01:08<30:27, 137.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 9984/261597 [01:09<30:10, 138.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10112/261597 [01:10<29:38, 141.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10240/261597 [01:11<29:27, 142.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10368/261597 [01:12<29:11, 143.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10496/261597 [01:12<29:26, 142.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10624/261597 [01:13<29:09, 143.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10752/261597 [01:14<29:16, 142.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10880/261597 [01:15<28:47, 145.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11008/261597 [01:16<28:55, 144.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11136/261597 [01:17<29:48, 140.01it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11264/261597 [01:18<30:10, 138.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11392/261597 [01:19<29:23, 141.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11520/261597 [01:20<30:07, 138.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11648/261597 [01:21<29:19, 142.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 11776/261597 [01:21<28:53, 144.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 11904/261597 [01:22<29:30, 141.01it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12032/261597 [01:23<28:48, 144.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12160/261597 [01:24<29:15, 142.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12288/261597 [01:25<29:54, 138.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12416/261597 [01:26<29:47, 139.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12544/261597 [01:27<28:52, 143.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12672/261597 [01:28<28:57, 143.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12800/261597 [01:29<29:59, 138.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12928/261597 [01:30<29:44, 139.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 13056/261597 [01:31<30:09, 137.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13184/261597 [01:32<30:38, 135.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13312/261597 [01:33<31:22, 131.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13440/261597 [01:34<31:06, 132.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13568/261597 [01:35<32:10, 128.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13696/261597 [01:36<31:45, 130.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13824/261597 [01:37<32:27, 127.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13952/261597 [01:38<32:33, 126.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 14080/261597 [01:38<30:34, 134.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 14208/261597 [01:39<30:52, 133.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 14336/261597 [01:40<30:12, 136.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 14464/261597 [01:41<29:43, 138.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 14592/261597 [01:42<30:10, 136.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 14720/261597 [01:43<30:25, 135.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 14848/261597 [01:44<30:25, 135.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 14976/261597 [01:45<30:37, 134.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15104/261597 [01:46<30:11, 136.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15232/261597 [01:47<31:54, 128.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15360/261597 [01:48<30:20, 135.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15488/261597 [01:49<30:33, 134.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15616/261597 [01:50<29:15, 140.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15744/261597 [01:51<29:04, 140.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15872/261597 [01:52<31:01, 132.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 16000/261597 [01:53<30:13, 135.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 16128/261597 [01:54<29:44, 137.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 16256/261597 [01:54<29:21, 139.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▋         | 16384/261597 [01:55<30:27, 134.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▋         | 16512/261597 [01:56<30:55, 132.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▋         | 16640/261597 [01:58<31:31, 129.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▋         | 16768/261597 [01:59<31:45, 128.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▋         | 16896/261597 [02:00<32:22, 125.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17024/261597 [02:01<31:34, 129.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17152/261597 [02:01<31:00, 131.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17280/261597 [02:02<30:34, 133.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17408/261597 [02:03<29:50, 136.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17536/261597 [02:04<29:15, 139.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17664/261597 [02:05<28:19, 143.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17792/261597 [02:06<28:49, 140.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17920/261597 [02:07<29:12, 139.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18048/261597 [02:08<29:32, 137.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18176/261597 [02:09<29:53, 135.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18304/261597 [02:10<28:14, 143.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18432/261597 [02:10<28:37, 141.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18560/261597 [02:11<28:53, 140.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18688/261597 [02:12<29:28, 137.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18816/261597 [02:13<28:39, 141.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18944/261597 [02:14<27:57, 144.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 19072/261597 [02:15<28:38, 141.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 19200/261597 [02:16<28:31, 141.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 19328/261597 [02:17<27:50, 145.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 19456/261597 [02:18<27:51, 144.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 19584/261597 [02:19<28:54, 139.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 19712/261597 [02:20<29:53, 134.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 19840/261597 [02:21<30:15, 133.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 19968/261597 [02:22<30:31, 131.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20096/261597 [02:23<30:40, 131.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20224/261597 [02:24<30:58, 129.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20352/261597 [02:25<30:44, 130.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20480/261597 [02:26<30:57, 129.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20608/261597 [02:27<30:25, 131.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20736/261597 [02:27<29:06, 137.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20864/261597 [02:28<27:40, 144.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20992/261597 [02:29<27:53, 143.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21120/261597 [02:30<28:29, 140.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21248/261597 [02:31<29:03, 137.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21376/261597 [02:32<29:04, 137.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21504/261597 [02:33<31:25, 127.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21632/261597 [02:34<31:17, 127.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21760/261597 [02:35<30:45, 129.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21888/261597 [02:36<30:22, 131.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 22016/261597 [02:37<30:34, 130.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 22144/261597 [02:38<29:56, 133.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▊         | 22272/261597 [02:39<29:47, 133.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▊         | 22400/261597 [02:40<29:07, 136.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▊         | 22528/261597 [02:41<29:08, 136.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▊         | 22656/261597 [02:42<29:10, 136.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▊         | 22784/261597 [02:43<30:47, 129.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 22912/261597 [02:44<30:14, 131.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23040/261597 [02:45<29:01, 136.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23168/261597 [02:45<28:07, 141.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23296/261597 [02:46<28:15, 140.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23424/261597 [02:47<28:57, 137.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23552/261597 [02:48<29:00, 136.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23680/261597 [02:49<28:33, 138.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23808/261597 [02:50<28:34, 138.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23936/261597 [02:51<28:30, 138.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24064/261597 [02:52<27:43, 142.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24192/261597 [02:53<28:07, 140.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24320/261597 [02:54<28:33, 138.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24448/261597 [02:55<27:37, 143.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24576/261597 [02:55<26:28, 149.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24704/261597 [02:56<26:35, 148.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24832/261597 [02:57<26:48, 147.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 24960/261597 [02:58<26:57, 146.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25088/261597 [02:59<26:57, 146.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25216/261597 [03:00<27:30, 143.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25344/261597 [03:01<30:32, 128.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25472/261597 [03:02<28:47, 136.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25600/261597 [03:03<28:54, 136.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25728/261597 [03:04<28:45, 136.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25856/261597 [03:05<28:46, 136.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25984/261597 [03:05<28:06, 139.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 26112/261597 [03:06<27:09, 144.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26240/261597 [03:07<27:34, 142.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26368/261597 [03:08<28:11, 139.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26496/261597 [03:09<28:48, 136.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26624/261597 [03:10<27:38, 141.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26752/261597 [03:11<26:52, 145.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26880/261597 [03:12<27:38, 141.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 27008/261597 [03:13<27:39, 141.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 27136/261597 [03:14<27:35, 141.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 27264/261597 [03:14<27:49, 140.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 27392/261597 [03:15<27:31, 141.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 27520/261597 [03:16<26:44, 145.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 27648/261597 [03:17<26:49, 145.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 27776/261597 [03:18<26:44, 145.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 27904/261597 [03:19<27:15, 142.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28032/261597 [03:20<27:17, 142.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28160/261597 [03:21<26:32, 146.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28288/261597 [03:21<25:43, 151.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28416/261597 [03:22<26:03, 149.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28544/261597 [03:23<26:34, 146.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28672/261597 [03:24<26:25, 146.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28800/261597 [03:25<26:12, 148.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28928/261597 [03:26<25:28, 152.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 29056/261597 [03:27<26:03, 148.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 29184/261597 [03:27<25:58, 149.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 29312/261597 [03:28<26:06, 148.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 29440/261597 [03:29<26:12, 147.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 29568/261597 [03:30<26:51, 143.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 29696/261597 [03:31<27:19, 141.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 29824/261597 [03:32<25:55, 149.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 29952/261597 [03:33<24:47, 155.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 30080/261597 [03:33<25:35, 150.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30208/261597 [03:34<25:38, 150.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30336/261597 [03:35<26:24, 145.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30464/261597 [03:36<27:26, 140.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30592/261597 [03:37<27:16, 141.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30720/261597 [03:38<27:35, 139.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30848/261597 [03:39<27:53, 137.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30976/261597 [03:40<26:46, 143.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31104/261597 [03:41<26:51, 143.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31232/261597 [03:42<27:41, 138.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31360/261597 [03:43<28:42, 133.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31488/261597 [03:44<28:45, 133.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31616/261597 [03:45<26:50, 142.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31744/261597 [03:45<26:15, 145.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31872/261597 [03:46<27:19, 140.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32000/261597 [03:47<27:08, 140.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32128/261597 [03:48<27:35, 138.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32256/261597 [03:49<26:42, 143.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32384/261597 [03:50<26:55, 141.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32512/261597 [03:51<26:47, 142.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32640/261597 [03:52<26:35, 143.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 32768/261597 [03:53<27:12, 140.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 32896/261597 [03:54<27:27, 138.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33024/261597 [03:55<27:20, 139.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33152/261597 [03:55<27:26, 138.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33280/261597 [03:56<26:10, 145.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33408/261597 [03:57<25:50, 147.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33536/261597 [03:58<26:28, 143.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33664/261597 [03:59<26:23, 143.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33792/261597 [04:00<25:12, 150.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33920/261597 [04:01<25:51, 146.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33920/261597 [04:01<27:00, 140.47it/s]


KeyboardInterrupt: 

In [3]:
import pandas as pd
import numpy as np
import torch
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Encode public_test
df = pd.read_csv('/kaggle/input/bkai-ai-track2-legal-document-retrieval/Legal Document Retrieval/public_test.csv')

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder', device=device)

def encode(lst = [], convert_to_tensor=True, batch_size=128):
    vectors = []
    with tqdm(total=len(lst), desc="Encoding questions") as pbar:
        for i in range(0, len(lst), batch_size):
            batch = lst[i:i + batch_size]
            encoded_batch = model.encode(batch, convert_to_tensor=True)
            if device == 'cuda':
                encoded_batch = encoded_batch.cpu()
            vectors.extend([np.array(arr) for arr in encoded_batch.numpy()])
            pbar.update(len(batch))
    return vectors

df['question_vector'] = encode(lst=list(df['question']))

output_df = df[['question', 'qid', 'question_vector']]

output_df.to_json('encoded_public_test.json')

Encoding questions:   0%|          | 0/10000 [00:00<?, ?it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:   1%|▏         | 128/10000 [00:00<00:15, 646.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:   3%|▎         | 256/10000 [00:00<00:12, 757.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:   4%|▍         | 384/10000 [00:00<00:11, 818.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:   5%|▌         | 512/10000 [00:00<00:11, 844.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:   6%|▋         | 640/10000 [00:00<00:10, 876.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:   8%|▊         | 768/10000 [00:00<00:10, 855.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:   9%|▉         | 896/10000 [00:01<00:10, 855.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  10%|█         | 1024/10000 [00:01<00:10, 849.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  12%|█▏        | 1152/10000 [00:01<00:10, 850.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  13%|█▎        | 1280/10000 [00:01<00:10, 858.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  14%|█▍        | 1408/10000 [00:01<00:09, 879.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  15%|█▌        | 1536/10000 [00:01<00:09, 888.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  17%|█▋        | 1664/10000 [00:01<00:09, 882.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  18%|█▊        | 1792/10000 [00:02<00:09, 868.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  19%|█▉        | 1920/10000 [00:02<00:09, 875.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  20%|██        | 2048/10000 [00:02<00:08, 887.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  22%|██▏       | 2176/10000 [00:02<00:08, 883.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  23%|██▎       | 2304/10000 [00:02<00:08, 876.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  24%|██▍       | 2432/10000 [00:02<00:08, 884.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  26%|██▌       | 2560/10000 [00:02<00:08, 896.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  27%|██▋       | 2688/10000 [00:03<00:08, 905.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  28%|██▊       | 2816/10000 [00:03<00:07, 909.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  29%|██▉       | 2944/10000 [00:03<00:07, 904.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  31%|███       | 3072/10000 [00:03<00:07, 911.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  32%|███▏      | 3200/10000 [00:03<00:07, 917.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  33%|███▎      | 3328/10000 [00:03<00:07, 912.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  35%|███▍      | 3456/10000 [00:03<00:07, 899.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  36%|███▌      | 3584/10000 [00:04<00:07, 906.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  37%|███▋      | 3712/10000 [00:04<00:06, 912.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  38%|███▊      | 3840/10000 [00:04<00:06, 903.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  40%|███▉      | 3968/10000 [00:04<00:06, 913.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  41%|████      | 4096/10000 [00:04<00:06, 912.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  42%|████▏     | 4224/10000 [00:04<00:06, 886.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  44%|████▎     | 4352/10000 [00:04<00:06, 896.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  45%|████▍     | 4480/10000 [00:05<00:06, 892.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  46%|████▌     | 4608/10000 [00:05<00:06, 894.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  47%|████▋     | 4736/10000 [00:05<00:05, 882.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  49%|████▊     | 4864/10000 [00:05<00:05, 881.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  50%|████▉     | 4992/10000 [00:05<00:05, 878.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  51%|█████     | 5120/10000 [00:05<00:05, 880.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  52%|█████▏    | 5248/10000 [00:05<00:05, 893.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  54%|█████▍    | 5376/10000 [00:06<00:05, 902.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  55%|█████▌    | 5504/10000 [00:06<00:05, 888.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  56%|█████▋    | 5632/10000 [00:06<00:04, 889.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  58%|█████▊    | 5760/10000 [00:06<00:04, 897.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  59%|█████▉    | 5888/10000 [00:06<00:04, 893.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  60%|██████    | 6016/10000 [00:06<00:04, 887.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  61%|██████▏   | 6144/10000 [00:06<00:04, 899.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  63%|██████▎   | 6272/10000 [00:07<00:04, 906.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  64%|██████▍   | 6400/10000 [00:07<00:04, 892.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  65%|██████▌   | 6528/10000 [00:07<00:03, 889.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  67%|██████▋   | 6656/10000 [00:07<00:03, 900.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  68%|██████▊   | 6784/10000 [00:07<00:03, 885.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  69%|██████▉   | 6912/10000 [00:07<00:03, 869.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  70%|███████   | 7040/10000 [00:07<00:03, 869.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  72%|███████▏  | 7168/10000 [00:08<00:03, 863.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  73%|███████▎  | 7296/10000 [00:08<00:03, 864.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  74%|███████▍  | 7424/10000 [00:08<00:02, 869.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  76%|███████▌  | 7552/10000 [00:08<00:02, 870.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  77%|███████▋  | 7680/10000 [00:08<00:02, 881.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  78%|███████▊  | 7808/10000 [00:08<00:02, 881.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  79%|███████▉  | 7936/10000 [00:08<00:02, 878.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  81%|████████  | 8064/10000 [00:09<00:02, 867.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  82%|████████▏ | 8192/10000 [00:09<00:02, 876.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  83%|████████▎ | 8320/10000 [00:09<00:02, 782.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  84%|████████▍ | 8448/10000 [00:09<00:01, 800.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  86%|████████▌ | 8576/10000 [00:09<00:01, 836.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  87%|████████▋ | 8704/10000 [00:09<00:01, 862.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  88%|████████▊ | 8832/10000 [00:10<00:01, 868.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  90%|████████▉ | 8960/10000 [00:10<00:01, 882.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  91%|█████████ | 9088/10000 [00:10<00:01, 875.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  92%|█████████▏| 9216/10000 [00:10<00:00, 881.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  93%|█████████▎| 9344/10000 [00:10<00:00, 870.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  95%|█████████▍| 9472/10000 [00:10<00:00, 875.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  96%|█████████▌| 9600/10000 [00:10<00:00, 875.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  97%|█████████▋| 9728/10000 [00:11<00:00, 877.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions:  99%|█████████▊| 9856/10000 [00:11<00:00, 890.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding questions: 100%|█████████▉| 9984/10000 [00:11<00:00, 906.29it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Encoding questions: 100%|██████████| 10000/10000 [00:11<00:00, 877.61it/s]


In [4]:
import pandas as pd
import numpy as np
import torch
!pip install sentence_transformers
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# Encode train.csv
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

df = pd.read_csv('/kaggle/input/bkai-ai-track2-legal-document-retrieval/Legal Document Retrieval/train.csv')

model = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder')
model = model.to(device)

def encode(lst = [], convert_to_tensor=True, batch_size=1024):
    vectors = []
    with tqdm(total=len(lst), desc="Encoding texts") as pbar:
        # Process in batches
        for i in range(0, len(lst), batch_size):
            batch = lst[i:i + batch_size]
            encoded_batch = model.encode(batch, convert_to_tensor=True, device=device)
            # Move to CPU before converting to numpy
            if torch.cuda.is_available():
                encoded_batch = encoded_batch.cpu()
            vectors.extend([np.array(arr) for arr in encoded_batch.numpy()])
            pbar.update(len(batch))
    return vectors

df['question_vector'] = encode(lst=list(df['question']))

df.to_json('encoded_train_full.json')


  pid, fd = os.forkpty()




Encoding texts:   0%|          | 0/119456 [00:00<?, ?it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:   1%|          | 1024/119456 [00:00<01:51, 1066.36it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 2048/119456 [00:01<01:48, 1085.04it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 3072/119456 [00:02<01:47, 1085.77it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 4096/119456 [00:03<01:46, 1082.50it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 5120/119456 [00:04<01:45, 1081.62it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 6144/119456 [00:05<01:44, 1080.22it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 7168/119456 [00:06<01:43, 1084.59it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 8192/119456 [00:07<01:42, 1083.60it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 9216/119456 [00:08<01:41, 1086.89it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:   9%|▊         | 10240/119456 [00:09<01:42, 1063.96it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 11264/119456 [00:10<01:42, 1056.39it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 12288/119456 [00:11<01:41, 1059.40it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 13312/119456 [00:12<01:39, 1067.56it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 14336/119456 [00:13<01:38, 1070.94it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 15360/119456 [00:14<01:36, 1074.96it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  14%|█▎        | 16384/119456 [00:15<01:35, 1083.45it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  15%|█▍        | 17408/119456 [00:16<01:33, 1087.00it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  15%|█▌        | 18432/119456 [00:17<01:33, 1077.13it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  16%|█▋        | 19456/119456 [00:18<01:34, 1062.23it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 20480/119456 [00:19<01:32, 1067.57it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 21504/119456 [00:20<01:31, 1074.72it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 22528/119456 [00:20<01:30, 1076.15it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  20%|█▉        | 23552/119456 [00:21<01:28, 1083.44it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 24576/119456 [00:22<01:27, 1089.62it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  21%|██▏       | 25600/119456 [00:23<01:26, 1081.70it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 26624/119456 [00:24<01:26, 1070.01it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 27648/119456 [00:25<01:25, 1067.67it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 28672/119456 [00:26<01:24, 1071.92it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  25%|██▍       | 29696/119456 [00:27<01:23, 1073.69it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 30720/119456 [00:28<01:23, 1068.36it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 31744/119456 [00:29<01:22, 1060.24it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 32768/119456 [00:30<01:21, 1067.16it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 33792/119456 [00:31<01:19, 1072.66it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 34816/119456 [00:32<01:18, 1079.20it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  30%|███       | 35840/119456 [00:33<01:17, 1080.54it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 36864/119456 [00:34<01:16, 1080.49it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 37888/119456 [00:35<01:15, 1077.70it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 38912/119456 [00:36<01:14, 1082.62it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 39936/119456 [00:37<01:13, 1087.54it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 40960/119456 [00:38<01:12, 1086.30it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  35%|███▌      | 41984/119456 [00:39<01:11, 1084.49it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 43008/119456 [00:39<01:10, 1085.31it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 44032/119456 [00:40<01:09, 1081.67it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 45056/119456 [00:41<01:08, 1081.65it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  39%|███▊      | 46080/119456 [00:42<01:07, 1086.04it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 47104/119456 [00:43<01:06, 1082.34it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  40%|████      | 48128/119456 [00:44<01:06, 1073.97it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 49152/119456 [00:45<01:05, 1077.20it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 50176/119456 [00:46<01:04, 1078.81it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 51200/119456 [00:47<01:03, 1075.18it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  44%|████▎     | 52224/119456 [00:48<01:03, 1064.91it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  45%|████▍     | 53248/119456 [00:49<01:01, 1070.01it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  45%|████▌     | 54272/119456 [00:50<01:00, 1069.80it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  46%|████▋     | 55296/119456 [00:51<01:00, 1068.19it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 56320/119456 [00:52<00:59, 1066.48it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 57344/119456 [00:53<00:58, 1059.38it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 58368/119456 [00:54<00:57, 1058.92it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  50%|████▉     | 59392/119456 [00:55<00:56, 1067.10it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 60416/119456 [00:56<00:55, 1072.26it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  51%|█████▏    | 61440/119456 [00:57<00:54, 1072.27it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 62464/119456 [00:58<00:53, 1074.33it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 63488/119456 [00:59<00:52, 1075.30it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 64512/119456 [01:00<00:51, 1068.94it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 65536/119456 [01:00<00:50, 1069.21it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 66560/119456 [01:01<00:49, 1069.46it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 67584/119456 [01:02<00:48, 1066.14it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 68608/119456 [01:03<00:47, 1064.70it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 69632/119456 [01:04<00:47, 1053.53it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 70656/119456 [01:05<00:46, 1054.77it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  60%|██████    | 71680/119456 [01:06<00:45, 1058.56it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 72704/119456 [01:07<00:44, 1058.81it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 73728/119456 [01:08<00:42, 1063.48it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 74752/119456 [01:09<00:42, 1063.69it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 75776/119456 [01:10<00:40, 1071.30it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 76800/119456 [01:11<00:39, 1069.59it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  65%|██████▌   | 77824/119456 [01:12<00:38, 1070.53it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 78848/119456 [01:13<00:37, 1071.35it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 79872/119456 [01:14<00:36, 1075.48it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 80896/119456 [01:15<00:36, 1061.79it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  69%|██████▊   | 81920/119456 [01:16<00:35, 1070.01it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 82944/119456 [01:17<00:34, 1063.50it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  70%|███████   | 83968/119456 [01:18<00:33, 1062.59it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 84992/119456 [01:19<00:32, 1063.60it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 86016/119456 [01:20<00:31, 1069.03it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 87040/119456 [01:21<00:30, 1065.68it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  74%|███████▎  | 88064/119456 [01:22<00:29, 1066.87it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  75%|███████▍  | 89088/119456 [01:23<00:28, 1070.29it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 90112/119456 [01:24<00:27, 1070.97it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  76%|███████▋  | 91136/119456 [01:25<00:26, 1068.62it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 92160/119456 [01:25<00:25, 1072.91it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 93184/119456 [01:26<00:24, 1079.45it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 94208/119456 [01:27<00:23, 1080.10it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  80%|███████▉  | 95232/119456 [01:28<00:22, 1080.52it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 96256/119456 [01:29<00:21, 1085.14it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  81%|████████▏ | 97280/119456 [01:30<00:20, 1078.43it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 98304/119456 [01:31<00:19, 1066.58it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 99328/119456 [01:32<00:18, 1069.64it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 100352/119456 [01:33<00:17, 1069.33it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 101376/119456 [01:34<00:17, 1062.24it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 102400/119456 [01:35<00:15, 1076.40it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 103424/119456 [01:36<00:14, 1079.30it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 104448/119456 [01:37<00:14, 1071.16it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 105472/119456 [01:38<00:13, 1069.52it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 106496/119456 [01:39<00:12, 1066.41it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  90%|█████████ | 107520/119456 [01:40<00:11, 1073.94it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 108544/119456 [01:41<00:10, 1082.56it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 109568/119456 [01:42<00:09, 1087.60it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 110592/119456 [01:43<00:08, 1084.32it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 111616/119456 [01:44<00:07, 1076.31it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 112640/119456 [01:44<00:06, 1073.93it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▌| 113664/119456 [01:45<00:05, 1075.24it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 114688/119456 [01:46<00:04, 1070.35it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 115712/119456 [01:47<00:03, 1068.12it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 116736/119456 [01:48<00:02, 1065.94it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▊| 117760/119456 [01:49<00:01, 1074.29it/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 118784/119456 [01:50<00:00, 1071.94it/s]

Batches:   0%|          | 0/21 [00:00<?, ?it/s]

Encoding texts: 100%|██████████| 119456/119456 [01:51<00:00, 1072.47it/s]


# Create a dissimilarity list and negative pairing

In [1]:
import torch
import torch.nn.functional as F
import pandas as pd
import zipfile
import json

# Input: corpus and training dataset
# Output: dissimilarity list

train_df = pd.read_json('/kaggle/input/encodedtrainfull/encoded_train_full.json')
corpus_df = pd.read_json('/kaggle/input/encoded/encoded_corpus.json')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

train_ids = train_df['qid'].tolist()
train_vectors = torch.tensor(train_df['question_vector'].tolist(), dtype=torch.float32).to(device)
corpus_ids = corpus_df['cid'].tolist()
corpus_vectors = torch.tensor(corpus_df['vector'].tolist(), dtype=torch.float32).to(device)

train_vectors = F.normalize(train_vectors, p=2, dim=1)
corpus_vectors = F.normalize(corpus_vectors, p=2, dim=1)

json_results = []
batch_size = 256

with open('predict_most_dissimilar.txt', 'w') as f:
    for i in range(0, len(train_ids), batch_size): 
        # Process in batches
        batch_train_vectors = train_vectors[i:i+batch_size]
        batch_train_ids = train_ids[i:i+batch_size]
        
        similarity_matrix = torch.matmul(batch_train_vectors, corpus_vectors.T)
        
        min_values, min_indices = torch.min(similarity_matrix, dim=1)
        
        for j, (idx, score) in enumerate(zip(min_indices, min_values)):
            qid = batch_train_ids[j]
            cid = str(corpus_ids[idx.item()])
            
            f.write(f"{qid} {cid}\n")

            json_entry = {
                "query_id": qid,
                "candidates": {
                    "doc_ids": [cid],
                    "scores": [score.item()],
                }
            }
            json_results.append(json_entry)

with open('predict_most_dissimilar.json', 'w', encoding='utf-8') as f:
    json.dump(json_results, f, ensure_ascii=False, indent=2)

with zipfile.ZipFile('predict_most_dissimilar.zip', 'w') as zipf:
    zipf.write('predict_most_dissimilar.txt')
    zipf.write('predict_most_dissimilar.json')


Using device: cuda


In [2]:
import os
import subprocess
from IPython.display import FileLink, display

# Download the dissimilarity list to local

def download_file(path, download_file_name):
    os.chdir('/kaggle/working/')
    zip_name = f"/kaggle/working/{download_file_name}.zip"
    command = f"zip {zip_name} {path} -r"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print("Unable to run zip command!")
        print(result.stderr)
        return
    display(FileLink(f'{download_file_name}.zip'))

# download_file('/kaggle/working', 'encoded_public_test_finetuned')
download_file('/kaggle/working', 'predict_dissimilar')

In [3]:
import pandas as pd
import json
import numpy as np

#Input: 
''' 
        DataFrames: df_train, df_corpus - csv input
        json: predict_most_dissimilar.json - 
    '''
#Output
''' 
        json: fine_tune_training.json -> will be used to fine-tune the model
    '''



df_train = pd.read_csv('/kaggle/input/bkai-ai-track2-legal-document-retrieval/Legal Document Retrieval/train.csv', on_bad_lines='skip')
df_corpus = pd.read_csv('/kaggle/input/preprocessed-corpus/preprocessed_corpus.csv')

with open('/kaggle/input/predict-dissimilar/kaggle/working/predict_most_dissimilar.json', 'r') as f:
    predictions = json.load(f)

training_data = []

for pred in predictions:
    qid = pred['query_id']
    neg_cid = pred['candidates']['doc_ids'][0]  #Negative cid
    
    # Query text from train.csv
    query_row = df_train[df_train['qid'] == qid]
    if query_row.empty:
        continue
    query_text = query_row['question'].iloc[0]
    
    # Positive -> ground truth - given in train.csv
    ground_truth_str = query_row['cid'].iloc[0]
    ground_truth_cids = ground_truth_str.strip('[]').split()
    pos_cid = ground_truth_cids[0]  
    
    pos_matches = df_corpus.loc[df_corpus['cid'] == int(pos_cid), 'text']
    neg_matches = df_corpus.loc[df_corpus['cid'] == int(neg_cid), 'text']
    
    if pos_matches.empty or neg_matches.empty:
        continue  
    
    pos_text = pos_matches.iloc[0]
    neg_text = neg_matches.iloc[0]
    
    entry = {
        "query": query_text,
        "pos": [pos_text],
        "neg": [neg_text]
    }
    training_data.append(entry)

with open('fine_tune_training.json', 'w', encoding='utf-8') as f:
    for entry in training_data:
        f.write(json.dumps(entry, ensure_ascii=False) + '\n')



# Fine-tune bkai-foundation-models/vietnamese-bi-encoder
- Use the given fine_tune_training.json to fine-tune the model
- This part was originally done on a high-performance server with a RTX 4090, but for demonstrative purpose, we will utilize Kaggle for now.
- Since the waiting time would be rigorous, in this code we only run with 1 epoch, while the orignal version ran with 3.

In [1]:
from datasets import Dataset, load_dataset
import os
! pip install -U accelerate
! pip install -U transformers sentence_transformers
os.environ["WANDB_DISABLED"] = "true"
import json
from sentence_transformers import (
    SentenceTransformer,
    losses,
    InputExample
)
from torch.utils.data import DataLoader


'''
    #Input:
    Model: bkai-foundation-models/vietnamese-bi-encoder
    Negative pairing dataset: fine_tune_training.json

    #Output:
    Fine-tuned model.
'''

model = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder')

def load_json_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        data = [json.loads(line) for line in f]
    
    train_examples = []
    for item in data:
        query = item['query']
        pos = item['pos'][0] 
        
        # Positive
        train_examples.append(InputExample(
            texts=[query, pos],
            label=1.0  
        ))
        
        # Negative
        for neg in item['neg']:
            train_examples.append(InputExample(
                texts=[query, neg],
                label=0.0 
            ))
    
    return train_examples

train_examples = load_json_data('/kaggle/input/negativepairing/fine_tune_training.json')

train_dataloader = DataLoader(
    train_examples,
    shuffle=True,
    batch_size=48
)

train_loss = losses.MultipleNegativesRankingLoss(model)

warmup_steps = int(len(train_dataloader) * 0.1)  

model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=1, #Kaggle: 1 epoch - HPC: 3 epochs
    warmup_steps=warmup_steps,
    optimizer_params={'lr': 2e-5},
    output_path='/kaggle/working/finetuned',
    save_best_model=True,
    show_progress_bar=True,
    checkpoint_path='/kaggle/working/checkpoints',
    checkpoint_save_steps=7500,  
    checkpoint_save_total_limit=1 
)

model.save('/kaggle/working/final') 

Collecting accelerate
  Downloading accelerate-1.2.1-py3-none-any.whl.metadata (19 kB)
Downloading accelerate-1.2.1-py3-none-any.whl (336 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m336.4/336.4 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hInstalling collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.34.2
    Uninstalling accelerate-0.34.2:
      Successfully uninstalled accelerate-0.34.2
Successfully installed accelerate-1.2.1
Collecting transformers
  Downloading transformers-4.48.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence_transformers
  Downloading sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/6.46k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/540M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.17k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/895k [00:00<?, ?B/s]

bpe.codes:   0%|          | 0.00/1.14M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/22.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/167 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Step,Training Loss
500,3.0845
1000,1.9874
1500,1.6836
2000,1.501
2500,1.3703
3000,1.2644
3500,1.2063
4000,1.1326
4500,1.1221


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

In [5]:
import os
import subprocess
from IPython.display import FileLink, display

#Save finetuned model(1 epoch version)
def download_file(path, download_file_name):
    os.chdir('/kaggle/working/')
    zip_name = f"/kaggle/working/{download_file_name}.zip"
    command = f"zip {zip_name} {path} -r"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print("Unable to run zip command!")
        print(result.stderr)
        return
    display(FileLink(f'{download_file_name}.zip'))

# download_file('/kaggle/working', 'encoded_public_test_finetuned')
download_file('encoded_corpus_finetuned.json', 'encoded_corpus_finetuned')

# Encode with fine-tuned model
- preprocessed_corpus.csv to encoded_corpus_finetuned.json
- public_test.csv to encoded_public_test_finetuned.json

In [4]:
import pandas as pd
import numpy as np
import torch
!pip install sentence_transformers
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

'''
    Input:
        Fine-tuned model: finetuned-biencoder
        preprocessed_corpus.csv
    Output:
        encoded_corpus_finetuned.json
'''

# Encode corpus.csv
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

df = pd.read_csv('/kaggle/input/preprocessed-corpus/preprocessed_corpus.csv')

#Not modified yet
model = SentenceTransformer('/kaggle/input/finetuned-biencoder/final')
model = model.to(device)

def encode(lst = [], convert_to_tensor=True, batch_size=128):
    vectors = []
    with tqdm(total = len(lst), desc="Encoding texts") as pbar:
        # Process in batches
        for i in range(0, len(lst), batch_size):
            batch = lst[i:i + batch_size]
            encoded_batch = model.encode(batch, convert_to_tensor=True, device=device)
            # Move to CPU before converting to numpy
            if torch.cuda.is_available():
                encoded_batch = encoded_batch.cpu()
            vectors.extend([np.array(arr) for arr in encoded_batch.numpy()])
            pbar.update(len(batch))
    return vectors

df['vector'] = encode(lst=list(df['text']))

df.to_json('encoded_corpus_finetuned.json')


  pid, fd = os.forkpty()




Encoding texts:   0%|          | 0/261597 [00:00<?, ?it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 128/261597 [00:00<29:17, 148.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 256/261597 [00:01<29:54, 145.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 384/261597 [00:02<30:04, 144.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 512/261597 [00:03<30:12, 144.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 640/261597 [00:04<30:10, 144.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 768/261597 [00:05<31:11, 139.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 896/261597 [00:06<31:23, 138.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 1024/261597 [00:07<31:43, 136.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 1152/261597 [00:08<32:19, 134.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   0%|          | 1280/261597 [00:09<31:31, 137.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 1408/261597 [00:10<30:53, 140.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 1536/261597 [00:11<31:42, 136.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 1664/261597 [00:11<30:52, 140.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 1792/261597 [00:12<30:16, 142.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 1920/261597 [00:13<29:37, 146.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2048/261597 [00:14<30:37, 141.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2176/261597 [00:15<30:02, 143.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2304/261597 [00:16<30:01, 143.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2432/261597 [00:17<30:38, 140.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2560/261597 [00:18<30:21, 142.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2688/261597 [00:18<29:50, 144.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2816/261597 [00:19<29:24, 146.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 2944/261597 [00:20<28:44, 149.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 3072/261597 [00:21<29:07, 147.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|          | 3200/261597 [00:22<29:40, 145.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|▏         | 3328/261597 [00:23<29:58, 143.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|▏         | 3456/261597 [00:24<30:23, 141.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|▏         | 3584/261597 [00:25<30:09, 142.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|▏         | 3712/261597 [00:26<29:51, 143.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   1%|▏         | 3840/261597 [00:26<30:11, 142.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 3968/261597 [00:27<30:21, 141.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4096/261597 [00:28<30:33, 140.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4224/261597 [00:29<30:57, 138.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4352/261597 [00:30<31:42, 135.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4480/261597 [00:31<30:06, 142.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4608/261597 [00:32<28:53, 148.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4736/261597 [00:33<28:28, 150.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4864/261597 [00:33<28:17, 151.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 4992/261597 [00:34<28:39, 149.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5120/261597 [00:35<29:06, 146.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5248/261597 [00:36<29:34, 144.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5376/261597 [00:37<29:32, 144.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5504/261597 [00:38<30:45, 138.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5632/261597 [00:39<30:37, 139.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5760/261597 [00:40<29:58, 142.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 5888/261597 [00:41<28:45, 148.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 6016/261597 [00:42<29:19, 145.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 6144/261597 [00:42<29:31, 144.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 6272/261597 [00:43<30:29, 139.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 6400/261597 [00:44<31:02, 137.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   2%|▏         | 6528/261597 [00:45<31:33, 134.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 6656/261597 [00:46<30:27, 139.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 6784/261597 [00:47<29:31, 143.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 6912/261597 [00:48<30:48, 137.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7040/261597 [00:49<30:40, 138.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7168/261597 [00:50<30:16, 140.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7296/261597 [00:51<30:18, 139.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7424/261597 [00:52<30:52, 137.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7552/261597 [00:53<29:49, 141.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7680/261597 [00:54<29:40, 142.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7808/261597 [00:55<30:39, 137.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 7936/261597 [00:55<30:55, 136.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8064/261597 [00:56<30:16, 139.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8192/261597 [00:57<30:00, 140.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8320/261597 [00:58<29:45, 141.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8448/261597 [00:59<30:03, 140.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8576/261597 [01:00<31:48, 132.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8704/261597 [01:01<30:56, 136.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8832/261597 [01:02<29:38, 142.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 8960/261597 [01:03<29:56, 140.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   3%|▎         | 9088/261597 [01:04<30:45, 136.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▎         | 9216/261597 [01:05<31:00, 135.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▎         | 9344/261597 [01:06<29:47, 141.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▎         | 9472/261597 [01:06<30:12, 139.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▎         | 9600/261597 [01:07<30:26, 137.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▎         | 9728/261597 [01:09<33:10, 126.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 9856/261597 [01:09<31:24, 133.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 9984/261597 [01:10<30:54, 135.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10112/261597 [01:11<30:01, 139.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10240/261597 [01:12<29:41, 141.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10368/261597 [01:13<29:20, 142.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10496/261597 [01:14<29:39, 141.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10624/261597 [01:15<29:51, 140.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10752/261597 [01:16<30:07, 138.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 10880/261597 [01:17<29:43, 140.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11008/261597 [01:18<29:51, 139.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11136/261597 [01:19<31:07, 134.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11264/261597 [01:20<31:26, 132.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11392/261597 [01:20<30:13, 137.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11520/261597 [01:22<31:08, 133.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   4%|▍         | 11648/261597 [01:22<30:10, 138.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 11776/261597 [01:23<29:26, 141.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 11904/261597 [01:24<29:52, 139.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12032/261597 [01:25<28:55, 143.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12160/261597 [01:26<29:29, 141.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12288/261597 [01:27<30:26, 136.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12416/261597 [01:28<30:13, 137.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12544/261597 [01:29<29:08, 142.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12672/261597 [01:30<29:14, 141.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12800/261597 [01:31<30:17, 136.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 12928/261597 [01:32<29:52, 138.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▍         | 13056/261597 [01:32<30:29, 135.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13184/261597 [01:33<31:00, 133.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13312/261597 [01:35<31:33, 131.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13440/261597 [01:35<31:16, 132.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13568/261597 [01:37<32:43, 126.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13696/261597 [01:38<32:25, 127.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13824/261597 [01:39<33:13, 124.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 13952/261597 [01:40<32:58, 125.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 14080/261597 [01:40<30:49, 133.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 14208/261597 [01:41<31:26, 131.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   5%|▌         | 14336/261597 [01:42<30:42, 134.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 14464/261597 [01:43<30:07, 136.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 14592/261597 [01:44<31:01, 132.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 14720/261597 [01:45<31:07, 132.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 14848/261597 [01:46<31:25, 130.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 14976/261597 [01:47<32:03, 128.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15104/261597 [01:48<31:25, 130.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15232/261597 [01:49<33:49, 121.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15360/261597 [01:50<31:52, 128.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15488/261597 [01:51<32:01, 128.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15616/261597 [01:52<30:18, 135.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15744/261597 [01:53<29:57, 136.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 15872/261597 [01:54<32:56, 124.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 16000/261597 [01:55<31:37, 129.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 16128/261597 [01:56<30:57, 132.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▌         | 16256/261597 [01:57<30:20, 134.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▋         | 16384/261597 [01:58<31:39, 129.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▋         | 16512/261597 [01:59<32:20, 126.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▋         | 16640/261597 [02:00<32:16, 126.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▋         | 16768/261597 [02:01<32:29, 125.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   6%|▋         | 16896/261597 [02:02<33:36, 121.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17024/261597 [02:03<32:50, 124.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17152/261597 [02:04<32:15, 126.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17280/261597 [02:05<31:29, 129.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17408/261597 [02:06<30:34, 133.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17536/261597 [02:07<29:59, 135.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17664/261597 [02:08<29:01, 140.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17792/261597 [02:09<29:37, 137.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 17920/261597 [02:10<30:07, 134.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18048/261597 [02:11<30:20, 133.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18176/261597 [02:12<30:41, 132.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18304/261597 [02:13<28:57, 140.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18432/261597 [02:14<29:13, 138.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18560/261597 [02:15<29:32, 137.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18688/261597 [02:16<30:08, 134.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18816/261597 [02:16<29:39, 136.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 18944/261597 [02:17<29:05, 139.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 19072/261597 [02:18<30:04, 134.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 19200/261597 [02:19<29:35, 136.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 19328/261597 [02:20<28:49, 140.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 19456/261597 [02:21<28:36, 141.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   7%|▋         | 19584/261597 [02:22<29:40, 135.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 19712/261597 [02:23<30:49, 130.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 19840/261597 [02:24<31:19, 128.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 19968/261597 [02:25<31:33, 127.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20096/261597 [02:26<31:37, 127.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20224/261597 [02:27<32:06, 125.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20352/261597 [02:28<31:27, 127.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20480/261597 [02:29<31:41, 126.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20608/261597 [02:30<31:13, 128.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20736/261597 [02:31<29:34, 135.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20864/261597 [02:32<27:56, 143.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 20992/261597 [02:33<28:31, 140.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21120/261597 [02:34<29:21, 136.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21248/261597 [02:35<30:04, 133.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21376/261597 [02:36<29:55, 133.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21504/261597 [02:37<31:39, 126.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21632/261597 [02:38<31:33, 126.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21760/261597 [02:39<31:05, 128.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 21888/261597 [02:40<30:44, 129.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 22016/261597 [02:41<31:09, 128.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   8%|▊         | 22144/261597 [02:42<30:25, 131.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▊         | 22272/261597 [02:43<30:12, 132.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▊         | 22400/261597 [02:44<29:33, 134.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▊         | 22528/261597 [02:45<29:45, 133.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▊         | 22656/261597 [02:46<30:01, 132.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▊         | 22784/261597 [02:47<32:16, 123.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 22912/261597 [02:48<31:24, 126.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23040/261597 [02:49<30:10, 131.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23168/261597 [02:49<29:13, 135.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23296/261597 [02:50<29:12, 135.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23424/261597 [02:51<29:51, 132.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23552/261597 [02:52<29:55, 132.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23680/261597 [02:53<29:11, 135.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23808/261597 [02:54<29:18, 135.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 23936/261597 [02:55<29:08, 135.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24064/261597 [02:56<28:21, 139.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24192/261597 [02:57<28:52, 137.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24320/261597 [02:58<29:29, 134.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24448/261597 [02:59<28:24, 139.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24576/261597 [03:00<27:04, 145.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24704/261597 [03:00<27:10, 145.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:   9%|▉         | 24832/261597 [03:01<27:20, 144.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 24960/261597 [03:02<27:13, 144.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25088/261597 [03:03<27:18, 144.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25216/261597 [03:04<27:53, 141.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25344/261597 [03:06<32:43, 120.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25472/261597 [03:06<30:32, 128.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25600/261597 [03:07<30:20, 129.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25728/261597 [03:08<30:00, 130.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25856/261597 [03:09<29:51, 131.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 25984/261597 [03:10<28:47, 136.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|▉         | 26112/261597 [03:11<27:45, 141.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26240/261597 [03:12<28:12, 139.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26368/261597 [03:13<28:44, 136.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26496/261597 [03:14<29:37, 132.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26624/261597 [03:15<28:30, 137.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26752/261597 [03:16<27:50, 140.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 26880/261597 [03:17<28:30, 137.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 27008/261597 [03:18<28:21, 137.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 27136/261597 [03:18<28:21, 137.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 27264/261597 [03:19<28:53, 135.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  10%|█         | 27392/261597 [03:20<28:28, 137.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 27520/261597 [03:21<27:26, 142.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 27648/261597 [03:22<27:24, 142.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 27776/261597 [03:23<27:13, 143.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 27904/261597 [03:24<27:43, 140.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28032/261597 [03:25<27:47, 140.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28160/261597 [03:26<27:01, 143.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28288/261597 [03:26<26:00, 149.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28416/261597 [03:27<26:22, 147.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28544/261597 [03:28<27:04, 143.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28672/261597 [03:29<26:59, 143.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28800/261597 [03:30<26:40, 145.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 28928/261597 [03:31<26:06, 148.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 29056/261597 [03:32<26:35, 145.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 29184/261597 [03:33<26:18, 147.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█         | 29312/261597 [03:33<26:36, 145.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 29440/261597 [03:34<26:29, 146.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 29568/261597 [03:35<26:54, 143.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 29696/261597 [03:36<27:39, 139.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 29824/261597 [03:37<26:08, 147.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 29952/261597 [03:38<25:02, 154.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  11%|█▏        | 30080/261597 [03:39<25:52, 149.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30208/261597 [03:40<25:51, 149.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30336/261597 [03:40<26:33, 145.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30464/261597 [03:41<26:55, 143.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30592/261597 [03:42<26:47, 143.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30720/261597 [03:43<27:27, 140.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30848/261597 [03:44<28:06, 136.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 30976/261597 [03:45<27:00, 142.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31104/261597 [03:46<27:05, 141.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31232/261597 [03:47<28:30, 134.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31360/261597 [03:48<29:42, 129.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31488/261597 [03:49<29:45, 128.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31616/261597 [03:50<27:39, 138.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31744/261597 [03:51<27:04, 141.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 31872/261597 [03:52<28:47, 132.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32000/261597 [03:53<28:29, 134.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32128/261597 [03:54<28:49, 132.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32256/261597 [03:55<27:47, 137.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32384/261597 [03:56<27:51, 137.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32512/261597 [03:56<27:51, 137.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  12%|█▏        | 32640/261597 [03:57<27:28, 138.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 32768/261597 [03:58<28:04, 135.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 32896/261597 [03:59<28:18, 134.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33024/261597 [04:00<28:14, 134.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33152/261597 [04:01<28:34, 133.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33280/261597 [04:02<27:05, 140.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33408/261597 [04:03<26:37, 142.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33536/261597 [04:04<27:25, 138.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33664/261597 [04:05<27:22, 138.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33792/261597 [04:06<25:55, 146.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 33920/261597 [04:07<26:33, 142.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 34048/261597 [04:08<27:48, 136.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 34176/261597 [04:09<28:17, 133.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 34304/261597 [04:09<27:29, 137.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 34432/261597 [04:11<29:04, 130.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 34560/261597 [04:12<29:09, 129.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 34688/261597 [04:12<28:57, 130.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 34816/261597 [04:13<28:21, 133.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 34944/261597 [04:14<28:57, 130.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 35072/261597 [04:15<27:11, 138.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  13%|█▎        | 35200/261597 [04:16<26:34, 141.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▎        | 35328/261597 [04:17<27:56, 134.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▎        | 35456/261597 [04:18<28:02, 134.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▎        | 35584/261597 [04:19<27:32, 136.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▎        | 35712/261597 [04:20<28:44, 130.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▎        | 35840/261597 [04:21<28:26, 132.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▎        | 35968/261597 [04:22<28:42, 131.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 36096/261597 [04:23<29:32, 127.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 36224/261597 [04:24<28:57, 129.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 36352/261597 [04:25<28:45, 130.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 36480/261597 [04:26<27:56, 134.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 36608/261597 [04:27<26:45, 140.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 36736/261597 [04:28<26:02, 143.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 36864/261597 [04:28<25:15, 148.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 36992/261597 [04:29<25:53, 144.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 37120/261597 [04:30<26:58, 138.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 37248/261597 [04:31<26:40, 140.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 37376/261597 [04:32<27:15, 137.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 37504/261597 [04:33<26:38, 140.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 37632/261597 [04:34<26:14, 142.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 37760/261597 [04:35<24:25, 152.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  14%|█▍        | 37888/261597 [04:35<23:50, 156.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▍        | 38016/261597 [04:36<23:42, 157.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▍        | 38144/261597 [04:37<24:42, 150.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▍        | 38272/261597 [04:38<25:56, 143.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▍        | 38400/261597 [04:39<28:07, 132.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▍        | 38528/261597 [04:40<27:31, 135.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▍        | 38656/261597 [04:41<27:23, 135.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▍        | 38784/261597 [04:42<27:00, 137.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▍        | 38912/261597 [04:43<27:51, 133.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▍        | 39040/261597 [04:44<27:13, 136.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▍        | 39168/261597 [04:45<26:14, 141.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▌        | 39296/261597 [04:46<26:45, 138.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▌        | 39424/261597 [04:47<26:05, 141.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▌        | 39552/261597 [04:47<25:48, 143.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▌        | 39680/261597 [04:48<26:26, 139.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▌        | 39808/261597 [04:49<25:45, 143.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▌        | 39936/261597 [04:50<24:48, 148.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▌        | 40064/261597 [04:51<24:51, 148.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▌        | 40192/261597 [04:52<25:24, 145.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▌        | 40320/261597 [04:53<24:41, 149.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  15%|█▌        | 40448/261597 [04:53<25:08, 146.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 40576/261597 [04:55<26:50, 137.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 40704/261597 [04:55<26:06, 141.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 40832/261597 [04:56<26:09, 140.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 40960/261597 [04:57<26:21, 139.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 41088/261597 [04:58<26:56, 136.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 41216/261597 [04:59<26:45, 137.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 41344/261597 [05:00<25:24, 144.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 41472/261597 [05:01<26:08, 140.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 41600/261597 [05:02<26:32, 138.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 41728/261597 [05:03<26:31, 138.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 41856/261597 [05:04<25:00, 146.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 41984/261597 [05:04<25:17, 144.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 42112/261597 [05:05<25:33, 143.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 42240/261597 [05:06<25:23, 144.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 42368/261597 [05:07<25:02, 145.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▌        | 42496/261597 [05:08<25:36, 142.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▋        | 42624/261597 [05:09<24:46, 147.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▋        | 42752/261597 [05:10<24:18, 150.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▋        | 42880/261597 [05:11<25:35, 142.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▋        | 43008/261597 [05:11<24:36, 148.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  16%|█▋        | 43136/261597 [05:12<24:57, 145.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 43264/261597 [05:13<25:23, 143.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 43392/261597 [05:14<25:05, 144.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 43520/261597 [05:15<25:11, 144.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 43648/261597 [05:16<24:31, 148.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 43776/261597 [05:17<24:41, 147.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 43904/261597 [05:18<26:06, 139.01it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 44032/261597 [05:19<26:24, 137.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 44160/261597 [05:20<26:36, 136.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 44288/261597 [05:21<26:43, 135.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 44416/261597 [05:22<26:14, 137.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 44544/261597 [05:22<25:43, 140.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 44672/261597 [05:23<25:37, 141.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 44800/261597 [05:24<26:06, 138.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 44928/261597 [05:25<25:40, 140.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 45056/261597 [05:26<25:22, 142.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 45184/261597 [05:27<25:36, 140.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 45312/261597 [05:28<25:13, 142.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 45440/261597 [05:29<25:57, 138.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 45568/261597 [05:30<26:51, 134.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  17%|█▋        | 45696/261597 [05:31<27:04, 132.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 45824/261597 [05:32<26:49, 134.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 45952/261597 [05:33<25:44, 139.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 46080/261597 [05:33<25:04, 143.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 46208/261597 [05:34<26:20, 136.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 46336/261597 [05:36<27:10, 132.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 46464/261597 [05:36<26:32, 135.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 46592/261597 [05:37<25:29, 140.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 46720/261597 [05:38<26:38, 134.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 46848/261597 [05:39<26:37, 134.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 46976/261597 [05:40<26:40, 134.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 47104/261597 [05:41<26:16, 136.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 47232/261597 [05:42<26:29, 134.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 47360/261597 [05:43<25:47, 138.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 47488/261597 [05:44<26:31, 134.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 47616/261597 [05:45<26:40, 133.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 47744/261597 [05:46<26:59, 132.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 47872/261597 [05:47<26:39, 133.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 48000/261597 [05:48<26:20, 135.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 48128/261597 [05:49<26:57, 131.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 48256/261597 [05:50<27:02, 131.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  18%|█▊        | 48384/261597 [05:51<26:44, 132.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▊        | 48512/261597 [05:52<26:29, 134.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▊        | 48640/261597 [05:53<25:48, 137.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▊        | 48768/261597 [05:53<24:56, 142.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▊        | 48896/261597 [05:54<24:59, 141.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▊        | 49024/261597 [05:55<24:35, 144.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 49152/261597 [05:56<25:12, 140.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 49280/261597 [05:57<25:54, 136.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 49408/261597 [05:58<25:55, 136.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 49536/261597 [05:59<25:38, 137.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 49664/261597 [06:00<25:53, 136.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 49792/261597 [06:01<25:45, 137.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 49920/261597 [06:02<26:16, 134.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 50048/261597 [06:03<25:46, 136.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 50176/261597 [06:04<26:52, 131.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 50304/261597 [06:05<26:40, 132.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 50432/261597 [06:06<26:30, 132.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 50560/261597 [06:07<26:41, 131.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 50688/261597 [06:08<26:53, 130.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 50816/261597 [06:09<26:15, 133.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  19%|█▉        | 50944/261597 [06:10<26:21, 133.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|█▉        | 51072/261597 [06:10<25:54, 135.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|█▉        | 51200/261597 [06:11<26:02, 134.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|█▉        | 51328/261597 [06:12<26:19, 133.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|█▉        | 51456/261597 [06:13<25:41, 136.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|█▉        | 51584/261597 [06:14<25:51, 135.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|█▉        | 51712/261597 [06:15<25:33, 136.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|█▉        | 51840/261597 [06:16<24:50, 140.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|█▉        | 51968/261597 [06:17<25:04, 139.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|█▉        | 52096/261597 [06:18<24:54, 140.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|█▉        | 52224/261597 [06:19<23:33, 148.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|██        | 52352/261597 [06:19<21:30, 162.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|██        | 52480/261597 [06:20<19:41, 176.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|██        | 52608/261597 [06:20<18:56, 183.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|██        | 52736/261597 [06:21<18:41, 186.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|██        | 52864/261597 [06:22<20:13, 172.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|██        | 52992/261597 [06:23<21:59, 158.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|██        | 53120/261597 [06:24<23:25, 148.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|██        | 53248/261597 [06:25<24:45, 140.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|██        | 53376/261597 [06:26<24:01, 144.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  20%|██        | 53504/261597 [06:27<23:16, 149.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 53632/261597 [06:27<22:52, 151.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 53760/261597 [06:28<23:47, 145.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 53888/261597 [06:29<23:42, 146.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 54016/261597 [06:30<24:06, 143.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 54144/261597 [06:31<23:50, 145.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 54272/261597 [06:32<24:12, 142.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 54400/261597 [06:33<25:28, 135.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 54528/261597 [06:34<26:15, 131.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 54656/261597 [06:35<27:20, 126.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 54784/261597 [06:36<27:29, 125.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 54912/261597 [06:37<27:12, 126.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 55040/261597 [06:38<27:13, 126.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 55168/261597 [06:39<26:38, 129.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 55296/261597 [06:40<26:22, 130.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 55424/261597 [06:41<25:16, 135.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██        | 55552/261597 [06:42<24:34, 139.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██▏       | 55680/261597 [06:43<24:02, 142.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██▏       | 55808/261597 [06:44<24:08, 142.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██▏       | 55936/261597 [06:44<23:50, 143.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██▏       | 56064/261597 [06:45<24:00, 142.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  21%|██▏       | 56192/261597 [06:46<24:03, 142.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 56320/261597 [06:47<23:57, 142.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 56448/261597 [06:48<24:29, 139.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 56576/261597 [06:49<24:08, 141.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 56704/261597 [06:50<24:09, 141.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 56832/261597 [06:51<24:10, 141.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 56960/261597 [06:52<24:07, 141.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 57088/261597 [06:53<24:23, 139.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 57216/261597 [06:54<24:24, 139.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 57344/261597 [06:54<24:18, 140.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 57472/261597 [06:55<24:24, 139.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 57600/261597 [06:56<24:27, 139.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 57728/261597 [06:57<24:22, 139.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 57856/261597 [06:58<24:11, 140.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 57984/261597 [06:59<23:59, 141.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 58112/261597 [07:00<24:17, 139.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 58240/261597 [07:01<23:56, 141.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 58368/261597 [07:02<24:02, 140.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 58496/261597 [07:03<23:41, 142.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 58624/261597 [07:03<23:42, 142.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  22%|██▏       | 58752/261597 [07:04<23:22, 144.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 58880/261597 [07:05<23:15, 145.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 59008/261597 [07:06<23:19, 144.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 59136/261597 [07:07<23:44, 142.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 59264/261597 [07:08<23:55, 140.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 59392/261597 [07:09<23:11, 145.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 59520/261597 [07:10<23:09, 145.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 59648/261597 [07:11<23:23, 143.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 59776/261597 [07:12<23:32, 142.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 59904/261597 [07:12<23:41, 141.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 60032/261597 [07:13<23:44, 141.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 60160/261597 [07:14<23:36, 142.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 60288/261597 [07:15<23:32, 142.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 60416/261597 [07:16<23:21, 143.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 60544/261597 [07:17<23:11, 144.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 60672/261597 [07:18<23:03, 145.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 60800/261597 [07:19<22:59, 145.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 60928/261597 [07:19<22:51, 146.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 61056/261597 [07:20<23:00, 145.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 61184/261597 [07:21<22:38, 147.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 61312/261597 [07:22<22:58, 145.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  23%|██▎       | 61440/261597 [07:23<23:07, 144.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▎       | 61568/261597 [07:24<23:17, 143.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▎       | 61696/261597 [07:25<22:58, 145.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▎       | 61824/261597 [07:26<22:58, 144.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▎       | 61952/261597 [07:27<23:04, 144.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▎       | 62080/261597 [07:27<23:19, 142.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 62208/261597 [07:28<23:09, 143.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 62336/261597 [07:29<22:43, 146.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 62464/261597 [07:30<22:56, 144.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 62592/261597 [07:31<22:36, 146.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 62720/261597 [07:32<22:29, 147.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 62848/261597 [07:33<22:21, 148.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 62976/261597 [07:34<22:19, 148.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 63104/261597 [07:34<21:50, 151.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 63232/261597 [07:35<21:50, 151.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 63360/261597 [07:36<22:15, 148.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 63488/261597 [07:37<22:44, 145.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 63616/261597 [07:38<22:28, 146.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 63744/261597 [07:39<22:19, 147.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 63872/261597 [07:40<22:17, 147.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  24%|██▍       | 64000/261597 [07:40<22:27, 146.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▍       | 64128/261597 [07:41<22:42, 144.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▍       | 64256/261597 [07:42<22:41, 144.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▍       | 64384/261597 [07:43<22:55, 143.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▍       | 64512/261597 [07:44<22:59, 142.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▍       | 64640/261597 [07:45<22:41, 144.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▍       | 64768/261597 [07:46<22:22, 146.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▍       | 64896/261597 [07:47<22:31, 145.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▍       | 65024/261597 [07:48<22:22, 146.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▍       | 65152/261597 [07:48<22:13, 147.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▍       | 65280/261597 [07:49<21:55, 149.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▌       | 65408/261597 [07:50<21:42, 150.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▌       | 65536/261597 [07:51<21:34, 151.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▌       | 65664/261597 [07:52<22:02, 148.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▌       | 65792/261597 [07:53<21:58, 148.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▌       | 65920/261597 [07:54<22:23, 145.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▌       | 66048/261597 [07:54<22:06, 147.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▌       | 66176/261597 [07:55<22:14, 146.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▌       | 66304/261597 [07:56<21:57, 148.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▌       | 66432/261597 [07:57<22:14, 146.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▌       | 66560/261597 [07:58<22:36, 143.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  25%|██▌       | 66688/261597 [07:59<22:18, 145.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 66816/261597 [08:00<22:05, 146.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 66944/261597 [08:01<22:13, 145.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 67072/261597 [08:01<22:11, 146.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 67200/261597 [08:02<22:12, 145.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 67328/261597 [08:03<22:21, 144.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 67456/261597 [08:04<21:57, 147.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 67584/261597 [08:05<22:21, 144.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 67712/261597 [08:06<22:25, 144.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 67840/261597 [08:07<22:30, 143.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 67968/261597 [08:08<22:07, 145.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 68096/261597 [08:08<22:09, 145.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 68224/261597 [08:09<21:37, 149.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 68352/261597 [08:10<21:52, 147.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 68480/261597 [08:11<22:15, 144.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▌       | 68608/261597 [08:12<22:12, 144.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▋       | 68736/261597 [08:13<22:16, 144.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▋       | 68864/261597 [08:14<22:29, 142.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▋       | 68992/261597 [08:15<22:46, 140.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▋       | 69120/261597 [08:16<22:21, 143.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  26%|██▋       | 69248/261597 [08:16<21:54, 146.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 69376/261597 [08:17<21:50, 146.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 69504/261597 [08:18<21:33, 148.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 69632/261597 [08:19<21:46, 146.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 69760/261597 [08:20<21:41, 147.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 69888/261597 [08:21<21:57, 145.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 70016/261597 [08:22<21:34, 148.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 70144/261597 [08:23<21:35, 147.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 70272/261597 [08:23<21:19, 149.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 70400/261597 [08:24<21:27, 148.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 70528/261597 [08:25<21:40, 146.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 70656/261597 [08:26<21:56, 145.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 70784/261597 [08:27<22:04, 144.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 70912/261597 [08:28<21:54, 145.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 71040/261597 [08:29<21:48, 145.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 71168/261597 [08:30<21:43, 146.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 71296/261597 [08:30<21:18, 148.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 71424/261597 [08:31<21:15, 149.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 71552/261597 [08:32<21:14, 149.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 71680/261597 [08:33<21:05, 150.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 71808/261597 [08:34<21:04, 150.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  27%|██▋       | 71936/261597 [08:35<21:08, 149.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 72064/261597 [08:35<20:55, 150.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 72192/261597 [08:36<21:04, 149.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 72320/261597 [08:37<20:47, 151.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 72448/261597 [08:38<20:48, 151.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 72576/261597 [08:39<21:35, 145.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 72704/261597 [08:40<21:22, 147.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 72832/261597 [08:41<21:35, 145.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 72960/261597 [08:42<21:35, 145.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 73088/261597 [08:42<21:03, 149.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 73216/261597 [08:43<20:53, 150.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 73344/261597 [08:44<21:07, 148.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 73472/261597 [08:45<20:48, 150.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 73600/261597 [08:46<20:50, 150.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 73728/261597 [08:47<20:47, 150.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 73856/261597 [08:48<21:00, 149.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 73984/261597 [08:48<21:00, 148.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 74112/261597 [08:49<20:57, 149.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 74240/261597 [08:50<20:51, 149.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 74368/261597 [08:51<20:46, 150.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  28%|██▊       | 74496/261597 [08:52<20:47, 150.01it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▊       | 74624/261597 [08:53<20:51, 149.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▊       | 74752/261597 [08:53<20:34, 151.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▊       | 74880/261597 [08:54<20:54, 148.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▊       | 75008/261597 [08:55<20:34, 151.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▊       | 75136/261597 [08:56<20:36, 150.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 75264/261597 [08:57<20:46, 149.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 75392/261597 [08:58<21:10, 146.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 75520/261597 [08:59<21:07, 146.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 75648/261597 [09:00<21:09, 146.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 75776/261597 [09:00<21:12, 146.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 75904/261597 [09:01<21:10, 146.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 76032/261597 [09:02<20:58, 147.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 76160/261597 [09:03<21:08, 146.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 76288/261597 [09:04<20:54, 147.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 76416/261597 [09:05<20:31, 150.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 76544/261597 [09:06<20:25, 151.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 76672/261597 [09:06<20:15, 152.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 76800/261597 [09:07<20:45, 148.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 76928/261597 [09:08<20:35, 149.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  29%|██▉       | 77056/261597 [09:09<20:28, 150.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|██▉       | 77184/261597 [09:10<20:26, 150.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|██▉       | 77312/261597 [09:11<20:17, 151.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|██▉       | 77440/261597 [09:11<19:56, 153.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|██▉       | 77568/261597 [09:12<20:00, 153.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|██▉       | 77696/261597 [09:13<20:00, 153.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|██▉       | 77824/261597 [09:14<20:13, 151.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|██▉       | 77952/261597 [09:15<20:28, 149.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|██▉       | 78080/261597 [09:16<20:29, 149.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|██▉       | 78208/261597 [09:17<20:32, 148.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|██▉       | 78336/261597 [09:17<20:23, 149.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|██▉       | 78464/261597 [09:18<19:58, 152.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|███       | 78592/261597 [09:19<20:26, 149.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|███       | 78720/261597 [09:20<20:17, 150.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|███       | 78848/261597 [09:21<20:11, 150.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|███       | 78976/261597 [09:22<20:40, 147.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|███       | 79104/261597 [09:23<20:38, 147.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|███       | 79232/261597 [09:23<20:15, 150.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|███       | 79360/261597 [09:24<20:13, 150.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|███       | 79488/261597 [09:25<20:19, 149.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|███       | 79616/261597 [09:26<20:31, 147.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  30%|███       | 79744/261597 [09:27<20:49, 145.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 79872/261597 [09:28<20:24, 148.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 80000/261597 [09:29<19:55, 151.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 80128/261597 [09:29<19:50, 152.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 80256/261597 [09:30<20:04, 150.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 80384/261597 [09:31<20:05, 150.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 80512/261597 [09:32<19:59, 150.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 80640/261597 [09:33<20:03, 150.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 80768/261597 [09:34<22:50, 131.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 80896/261597 [09:35<21:56, 137.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 81024/261597 [09:36<21:16, 141.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 81152/261597 [09:37<21:21, 140.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 81280/261597 [09:38<20:54, 143.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 81408/261597 [09:38<20:46, 144.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 81536/261597 [09:39<20:37, 145.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███       | 81664/261597 [09:40<20:38, 145.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███▏      | 81792/261597 [09:41<20:46, 144.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███▏      | 81920/261597 [09:42<20:45, 144.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███▏      | 82048/261597 [09:43<20:44, 144.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███▏      | 82176/261597 [09:44<20:33, 145.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  31%|███▏      | 82304/261597 [09:45<20:38, 144.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 82432/261597 [09:45<20:12, 147.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 82560/261597 [09:46<20:20, 146.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 82688/261597 [09:47<19:51, 150.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 82816/261597 [09:48<19:53, 149.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 82944/261597 [09:49<19:39, 151.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 83072/261597 [09:50<19:27, 152.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 83200/261597 [09:50<19:44, 150.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 83328/261597 [09:51<19:46, 150.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 83456/261597 [09:52<19:38, 151.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 83584/261597 [09:53<19:18, 153.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 83712/261597 [09:54<19:20, 153.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 83840/261597 [09:55<19:38, 150.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 83968/261597 [09:56<19:40, 150.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 84096/261597 [09:56<19:33, 151.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 84224/261597 [09:57<19:55, 148.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 84352/261597 [09:58<19:41, 150.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 84480/261597 [09:59<20:07, 146.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 84608/261597 [10:00<19:40, 149.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 84736/261597 [10:01<20:03, 146.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 84864/261597 [10:02<19:47, 148.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  32%|███▏      | 84992/261597 [10:02<19:53, 147.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 85120/261597 [10:03<19:52, 147.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 85248/261597 [10:04<19:47, 148.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 85376/261597 [10:05<19:28, 150.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 85504/261597 [10:06<19:22, 151.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 85632/261597 [10:07<19:41, 148.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 85760/261597 [10:08<20:00, 146.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 85888/261597 [10:09<20:04, 145.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 86016/261597 [10:09<19:47, 147.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 86144/261597 [10:10<19:47, 147.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 86272/261597 [10:11<19:46, 147.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 86400/261597 [10:12<19:19, 151.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 86528/261597 [10:13<19:22, 150.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 86656/261597 [10:14<19:04, 152.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 86784/261597 [10:14<19:04, 152.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 86912/261597 [10:15<18:59, 153.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 87040/261597 [10:16<18:59, 153.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 87168/261597 [10:17<19:06, 152.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 87296/261597 [10:18<19:13, 151.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 87424/261597 [10:19<19:07, 151.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  33%|███▎      | 87552/261597 [10:19<19:17, 150.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▎      | 87680/261597 [10:20<19:10, 151.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▎      | 87808/261597 [10:21<18:59, 152.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▎      | 87936/261597 [10:22<18:40, 154.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▎      | 88064/261597 [10:23<18:43, 154.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▎      | 88192/261597 [10:24<19:06, 151.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 88320/261597 [10:25<19:07, 150.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 88448/261597 [10:25<19:05, 151.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 88576/261597 [10:26<19:13, 149.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 88704/261597 [10:27<19:11, 150.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 88832/261597 [10:28<19:34, 147.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 88960/261597 [10:29<19:38, 146.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 89088/261597 [10:30<19:09, 150.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 89216/261597 [10:31<19:14, 149.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 89344/261597 [10:31<19:16, 148.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 89472/261597 [10:32<18:55, 151.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 89600/261597 [10:33<18:38, 153.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 89728/261597 [10:34<18:37, 153.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 89856/261597 [10:35<18:55, 151.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 89984/261597 [10:36<19:05, 149.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 90112/261597 [10:36<19:10, 149.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  34%|███▍      | 90240/261597 [10:37<19:00, 150.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▍      | 90368/261597 [10:38<19:09, 148.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▍      | 90496/261597 [10:39<19:01, 149.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▍      | 90624/261597 [10:40<19:13, 148.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▍      | 90752/261597 [10:41<19:14, 147.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▍      | 90880/261597 [10:42<19:25, 146.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▍      | 91008/261597 [10:42<18:54, 150.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▍      | 91136/261597 [10:43<18:49, 150.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▍      | 91264/261597 [10:44<18:46, 151.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▍      | 91392/261597 [10:45<19:19, 146.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▍      | 91520/261597 [10:46<18:52, 150.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▌      | 91648/261597 [10:47<18:37, 152.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▌      | 91776/261597 [10:48<18:34, 152.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▌      | 91904/261597 [10:48<18:50, 150.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▌      | 92032/261597 [10:49<18:53, 149.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▌      | 92160/261597 [10:50<18:49, 150.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▌      | 92288/261597 [10:51<18:33, 152.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▌      | 92416/261597 [10:52<18:20, 153.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▌      | 92544/261597 [10:53<18:28, 152.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▌      | 92672/261597 [10:53<18:21, 153.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  35%|███▌      | 92800/261597 [10:54<18:11, 154.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 92928/261597 [10:55<18:36, 151.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 93056/261597 [10:56<19:01, 147.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 93184/261597 [10:57<19:00, 147.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 93312/261597 [10:58<18:53, 148.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 93440/261597 [10:59<18:46, 149.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 93568/261597 [10:59<18:34, 150.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 93696/261597 [11:00<18:20, 152.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 93824/261597 [11:01<18:03, 154.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 93952/261597 [11:02<18:22, 152.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 94080/261597 [11:03<18:27, 151.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 94208/261597 [11:04<18:34, 150.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 94336/261597 [11:05<18:27, 151.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 94464/261597 [11:05<18:40, 149.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 94592/261597 [11:06<18:35, 149.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▌      | 94720/261597 [11:07<18:35, 149.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▋      | 94848/261597 [11:08<18:35, 149.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▋      | 94976/261597 [11:09<18:32, 149.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▋      | 95104/261597 [11:10<18:29, 150.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▋      | 95232/261597 [11:10<18:11, 152.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  36%|███▋      | 95360/261597 [11:11<18:32, 149.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 95488/261597 [11:12<18:17, 151.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 95616/261597 [11:13<18:05, 152.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 95744/261597 [11:14<18:12, 151.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 95872/261597 [11:15<18:09, 152.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 96000/261597 [11:16<18:12, 151.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 96128/261597 [11:16<18:36, 148.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 96256/261597 [11:17<18:17, 150.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 96384/261597 [11:18<18:03, 152.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 96512/261597 [11:19<18:03, 152.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 96640/261597 [11:20<17:38, 155.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 96768/261597 [11:21<17:46, 154.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 96896/261597 [11:21<17:48, 154.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 97024/261597 [11:22<17:49, 153.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 97152/261597 [11:23<17:52, 153.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 97280/261597 [11:24<18:03, 151.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 97408/261597 [11:25<18:08, 150.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 97536/261597 [11:26<17:56, 152.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 97664/261597 [11:26<18:03, 151.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 97792/261597 [11:27<18:15, 149.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 97920/261597 [11:28<17:55, 152.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  37%|███▋      | 98048/261597 [11:29<17:46, 153.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 98176/261597 [11:30<17:48, 152.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 98304/261597 [11:31<17:44, 153.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 98432/261597 [11:31<17:31, 155.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 98560/261597 [11:32<17:40, 153.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 98688/261597 [11:33<17:32, 154.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 98816/261597 [11:34<17:21, 156.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 98944/261597 [11:35<18:05, 149.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 99072/261597 [11:36<17:54, 151.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 99200/261597 [11:37<18:31, 146.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 99328/261597 [11:38<18:35, 145.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 99456/261597 [11:38<18:18, 147.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 99584/261597 [11:39<17:58, 150.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 99712/261597 [11:40<17:45, 151.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 99840/261597 [11:41<17:24, 154.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 99968/261597 [11:42<17:32, 153.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 100096/261597 [11:42<17:21, 155.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 100224/261597 [11:43<17:21, 154.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 100352/261597 [11:44<17:39, 152.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 100480/261597 [11:45<17:50, 150.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  38%|███▊      | 100608/261597 [11:46<17:43, 151.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▊      | 100736/261597 [11:47<17:51, 150.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▊      | 100864/261597 [11:48<17:40, 151.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▊      | 100992/261597 [11:48<17:34, 152.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▊      | 101120/261597 [11:49<17:36, 151.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▊      | 101248/261597 [11:50<17:39, 151.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 101376/261597 [11:51<17:29, 152.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 101504/261597 [11:52<17:09, 155.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 101632/261597 [11:53<17:10, 155.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 101760/261597 [11:53<17:15, 154.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 101888/261597 [11:54<17:17, 153.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 102016/261597 [11:55<17:17, 153.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 102144/261597 [11:56<17:06, 155.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 102272/261597 [11:57<17:12, 154.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 102400/261597 [11:57<17:06, 155.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 102528/261597 [11:58<17:02, 155.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 102656/261597 [11:59<17:07, 154.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 102784/261597 [12:00<17:16, 153.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 102912/261597 [12:01<17:28, 151.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 103040/261597 [12:02<17:38, 149.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 103168/261597 [12:03<17:16, 152.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  39%|███▉      | 103296/261597 [12:03<17:05, 154.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|███▉      | 103424/261597 [12:04<17:12, 153.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|███▉      | 103552/261597 [12:05<17:36, 149.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|███▉      | 103680/261597 [12:06<17:29, 150.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|███▉      | 103808/261597 [12:07<17:34, 149.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|███▉      | 103936/261597 [12:08<17:34, 149.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|███▉      | 104064/261597 [12:09<17:38, 148.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|███▉      | 104192/261597 [12:09<17:31, 149.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|███▉      | 104320/261597 [12:10<17:09, 152.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|███▉      | 104448/261597 [12:11<17:12, 152.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|███▉      | 104576/261597 [12:12<16:58, 154.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|████      | 104704/261597 [12:13<16:55, 154.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|████      | 104832/261597 [12:13<17:03, 153.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|████      | 104960/261597 [12:14<17:08, 152.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|████      | 105088/261597 [12:15<16:24, 158.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|████      | 105216/261597 [12:16<16:20, 159.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|████      | 105344/261597 [12:17<16:43, 155.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|████      | 105472/261597 [12:18<16:42, 155.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|████      | 105600/261597 [12:18<16:50, 154.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|████      | 105728/261597 [12:19<16:39, 156.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  40%|████      | 105856/261597 [12:20<16:44, 155.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 105984/261597 [12:21<16:48, 154.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 106112/261597 [12:22<16:42, 155.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 106240/261597 [12:23<16:34, 156.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 106368/261597 [12:23<16:52, 153.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 106496/261597 [12:24<16:34, 155.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 106624/261597 [12:25<16:38, 155.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 106752/261597 [12:26<16:41, 154.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 106880/261597 [12:27<16:56, 152.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 107008/261597 [12:28<16:53, 152.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 107136/261597 [12:28<16:55, 152.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 107264/261597 [12:29<17:07, 150.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 107392/261597 [12:30<16:51, 152.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 107520/261597 [12:31<16:42, 153.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 107648/261597 [12:32<16:43, 153.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 107776/261597 [12:33<16:42, 153.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████      | 107904/261597 [12:33<16:51, 151.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████▏     | 108032/261597 [12:34<16:44, 152.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████▏     | 108160/261597 [12:35<17:00, 150.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████▏     | 108288/261597 [12:36<17:17, 147.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████▏     | 108416/261597 [12:37<17:01, 149.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  41%|████▏     | 108544/261597 [12:38<17:06, 149.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 108672/261597 [12:39<16:57, 150.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 108800/261597 [12:39<16:45, 151.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 108928/261597 [12:40<16:52, 150.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 109056/261597 [12:41<16:59, 149.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 109184/261597 [12:42<16:53, 150.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 109312/261597 [12:43<16:36, 152.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 109440/261597 [12:44<16:40, 152.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 109568/261597 [12:44<16:18, 155.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 109696/261597 [12:45<16:15, 155.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 109824/261597 [12:46<16:22, 154.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 109952/261597 [12:47<16:47, 150.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 110080/261597 [12:48<16:53, 149.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 110208/261597 [12:49<16:37, 151.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 110336/261597 [12:49<16:20, 154.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 110464/261597 [12:50<16:25, 153.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 110592/261597 [12:51<16:10, 155.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 110720/261597 [12:52<16:07, 155.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 110848/261597 [12:53<16:02, 156.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 110976/261597 [12:54<16:06, 155.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  42%|████▏     | 111104/261597 [12:54<15:56, 157.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 111232/261597 [12:55<16:04, 155.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 111360/261597 [12:56<16:19, 153.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 111488/261597 [12:57<16:18, 153.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 111616/261597 [12:58<16:01, 156.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 111744/261597 [12:58<16:03, 155.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 111872/261597 [12:59<15:55, 156.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 112000/261597 [13:00<16:03, 155.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 112128/261597 [13:01<16:03, 155.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 112256/261597 [13:02<16:05, 154.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 112384/261597 [13:03<15:58, 155.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 112512/261597 [13:03<16:20, 152.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 112640/261597 [13:04<16:18, 152.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 112768/261597 [13:05<16:10, 153.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 112896/261597 [13:06<16:15, 152.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 113024/261597 [13:07<16:20, 151.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 113152/261597 [13:08<16:21, 151.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 113280/261597 [13:09<16:02, 154.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 113408/261597 [13:09<16:21, 151.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 113536/261597 [13:10<16:18, 151.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 113664/261597 [13:11<15:57, 154.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  43%|████▎     | 113792/261597 [13:12<16:06, 152.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▎     | 113920/261597 [13:13<15:56, 154.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▎     | 114048/261597 [13:14<16:04, 152.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▎     | 114176/261597 [13:14<16:35, 148.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▎     | 114304/261597 [13:15<16:28, 148.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▎     | 114432/261597 [13:16<16:12, 151.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 114560/261597 [13:17<16:14, 150.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 114688/261597 [13:18<15:57, 153.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 114816/261597 [13:19<15:45, 155.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 114944/261597 [13:19<15:44, 155.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 115072/261597 [13:20<15:41, 155.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 115200/261597 [13:21<15:42, 155.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 115328/261597 [13:22<15:46, 154.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 115456/261597 [13:23<15:53, 153.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 115584/261597 [13:24<15:40, 155.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 115712/261597 [13:24<15:41, 155.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 115840/261597 [13:25<15:33, 156.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 115968/261597 [13:26<15:38, 155.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 116096/261597 [13:27<15:39, 154.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 116224/261597 [13:28<15:41, 154.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  44%|████▍     | 116352/261597 [13:29<15:36, 155.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▍     | 116480/261597 [13:29<15:53, 152.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▍     | 116608/261597 [13:30<15:31, 155.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▍     | 116736/261597 [13:31<15:18, 157.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▍     | 116864/261597 [13:32<15:04, 159.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▍     | 116992/261597 [13:33<15:12, 158.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▍     | 117120/261597 [13:33<15:09, 158.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▍     | 117248/261597 [13:34<15:17, 157.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▍     | 117376/261597 [13:35<15:29, 155.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▍     | 117504/261597 [13:36<15:16, 157.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▍     | 117632/261597 [13:37<15:05, 159.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▌     | 117760/261597 [13:37<15:09, 158.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▌     | 117888/261597 [13:38<15:11, 157.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▌     | 118016/261597 [13:39<15:21, 155.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▌     | 118144/261597 [13:40<15:20, 155.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▌     | 118272/261597 [13:41<15:08, 157.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▌     | 118400/261597 [13:42<15:11, 157.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▌     | 118528/261597 [13:42<15:09, 157.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▌     | 118656/261597 [13:43<15:07, 157.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▌     | 118784/261597 [13:44<15:12, 156.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  45%|████▌     | 118912/261597 [13:45<15:20, 154.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 119040/261597 [13:46<15:05, 157.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 119168/261597 [13:46<15:22, 154.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 119296/261597 [13:47<15:18, 154.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 119424/261597 [13:48<15:39, 151.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 119552/261597 [13:49<15:42, 150.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 119680/261597 [13:50<15:17, 154.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 119808/261597 [13:51<15:18, 154.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 119936/261597 [13:51<15:09, 155.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 120064/261597 [13:52<15:09, 155.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 120192/261597 [13:53<15:07, 155.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 120320/261597 [13:54<15:28, 152.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 120448/261597 [13:55<15:36, 150.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 120576/261597 [13:56<15:21, 153.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 120704/261597 [13:56<15:16, 153.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 120832/261597 [13:57<15:10, 154.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▌     | 120960/261597 [13:58<15:12, 154.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▋     | 121088/261597 [13:59<14:55, 156.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▋     | 121216/261597 [14:00<14:53, 157.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▋     | 121344/261597 [14:01<14:56, 156.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▋     | 121472/261597 [14:01<14:55, 156.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  46%|████▋     | 121600/261597 [14:02<14:48, 157.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 121728/261597 [14:03<14:44, 158.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 121856/261597 [14:04<14:54, 156.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 121984/261597 [14:05<15:06, 153.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 122112/261597 [14:05<14:54, 155.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 122240/261597 [14:06<15:17, 151.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 122368/261597 [14:07<15:31, 149.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 122496/261597 [14:08<15:18, 151.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 122624/261597 [14:09<15:11, 152.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 122752/261597 [14:10<15:04, 153.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 122880/261597 [14:11<15:02, 153.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 123008/261597 [14:11<15:06, 152.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 123136/261597 [14:12<15:03, 153.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 123264/261597 [14:13<14:57, 154.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 123392/261597 [14:14<14:59, 153.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 123520/261597 [14:15<14:58, 153.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 123648/261597 [14:15<14:41, 156.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 123776/261597 [14:16<14:47, 155.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 123904/261597 [14:17<14:49, 154.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 124032/261597 [14:18<14:39, 156.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  47%|████▋     | 124160/261597 [14:19<14:27, 158.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 124288/261597 [14:20<14:41, 155.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 124416/261597 [14:21<15:05, 151.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 124544/261597 [14:21<14:42, 155.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 124672/261597 [14:22<14:32, 156.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 124800/261597 [14:23<14:22, 158.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 124928/261597 [14:24<14:35, 156.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 125056/261597 [14:24<14:22, 158.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 125184/261597 [14:25<14:27, 157.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 125312/261597 [14:26<14:16, 159.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 125440/261597 [14:27<14:25, 157.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 125568/261597 [14:28<14:49, 152.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 125696/261597 [14:29<14:42, 154.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 125824/261597 [14:30<14:57, 151.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 125952/261597 [14:30<14:46, 153.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 126080/261597 [14:31<14:33, 155.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 126208/261597 [14:32<14:43, 153.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 126336/261597 [14:33<14:33, 154.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 126464/261597 [14:34<14:29, 155.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 126592/261597 [14:34<14:43, 152.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 126720/261597 [14:35<14:24, 155.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  48%|████▊     | 126848/261597 [14:36<14:18, 156.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▊     | 126976/261597 [14:37<14:12, 157.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▊     | 127104/261597 [14:38<14:10, 158.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▊     | 127232/261597 [14:39<14:13, 157.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▊     | 127360/261597 [14:39<14:08, 158.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▊     | 127488/261597 [14:40<14:02, 159.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 127616/261597 [14:41<13:56, 160.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 127744/261597 [14:42<14:08, 157.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 127872/261597 [14:43<14:24, 154.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 128000/261597 [14:43<14:43, 151.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 128128/261597 [14:44<14:35, 152.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 128256/261597 [14:45<14:30, 153.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 128384/261597 [14:46<14:40, 151.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 128512/261597 [14:47<14:36, 151.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 128640/261597 [14:48<15:06, 146.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 128768/261597 [14:49<15:20, 144.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 128896/261597 [14:50<15:37, 141.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 129024/261597 [14:50<14:57, 147.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 129152/261597 [14:51<14:45, 149.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 129280/261597 [14:52<14:58, 147.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  49%|████▉     | 129408/261597 [14:53<14:52, 148.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|████▉     | 129536/261597 [14:54<14:35, 150.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|████▉     | 129664/261597 [14:55<14:51, 148.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|████▉     | 129792/261597 [14:56<15:14, 144.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|████▉     | 129920/261597 [14:57<15:07, 145.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|████▉     | 130048/261597 [14:57<14:58, 146.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|████▉     | 130176/261597 [14:58<14:32, 150.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|████▉     | 130304/261597 [14:59<14:39, 149.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|████▉     | 130432/261597 [15:00<14:53, 146.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|████▉     | 130560/261597 [15:01<15:08, 144.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|████▉     | 130688/261597 [15:02<14:48, 147.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|█████     | 130816/261597 [15:03<14:30, 150.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|█████     | 130944/261597 [15:03<14:14, 152.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|█████     | 131072/261597 [15:04<14:04, 154.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|█████     | 131200/261597 [15:05<13:51, 156.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|█████     | 131328/261597 [15:06<13:38, 159.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|█████     | 131456/261597 [15:07<13:58, 155.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|█████     | 131584/261597 [15:08<14:28, 149.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|█████     | 131712/261597 [15:08<14:20, 150.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|█████     | 131840/261597 [15:09<14:15, 151.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|█████     | 131968/261597 [15:10<14:03, 153.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  50%|█████     | 132096/261597 [15:11<13:51, 155.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 132224/261597 [15:12<14:14, 151.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 132352/261597 [15:12<14:05, 152.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 132480/261597 [15:13<13:57, 154.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 132608/261597 [15:14<13:45, 156.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 132736/261597 [15:15<13:42, 156.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 132864/261597 [15:16<14:15, 150.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 132992/261597 [15:17<14:18, 149.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 133120/261597 [15:18<14:23, 148.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 133248/261597 [15:18<14:20, 149.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 133376/261597 [15:19<14:00, 152.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 133504/261597 [15:20<14:31, 147.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 133632/261597 [15:21<14:15, 149.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 133760/261597 [15:22<14:14, 149.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 133888/261597 [15:23<14:02, 151.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████     | 134016/261597 [15:23<13:51, 153.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████▏    | 134144/261597 [15:24<14:02, 151.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████▏    | 134272/261597 [15:25<13:55, 152.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████▏    | 134400/261597 [15:26<14:08, 149.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████▏    | 134528/261597 [15:27<14:09, 149.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  51%|█████▏    | 134656/261597 [15:28<14:07, 149.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 134784/261597 [15:29<13:53, 152.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 134912/261597 [15:30<14:26, 146.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 135040/261597 [15:30<14:17, 147.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 135168/261597 [15:31<14:25, 146.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 135296/261597 [15:32<14:37, 143.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 135424/261597 [15:33<14:26, 145.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 135552/261597 [15:34<13:51, 151.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 135680/261597 [15:35<13:47, 152.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 135808/261597 [15:35<13:41, 153.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 135936/261597 [15:36<14:11, 147.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 136064/261597 [15:37<14:47, 141.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 136192/261597 [15:38<15:19, 136.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 136320/261597 [15:39<15:08, 137.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 136448/261597 [15:40<15:10, 137.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 136576/261597 [15:41<15:43, 132.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 136704/261597 [15:42<16:04, 129.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 136832/261597 [15:43<16:20, 127.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 136960/261597 [15:44<15:24, 134.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 137088/261597 [15:45<14:52, 139.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  52%|█████▏    | 137216/261597 [15:46<14:13, 145.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 137344/261597 [15:47<13:55, 148.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 137472/261597 [15:48<13:49, 149.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 137600/261597 [15:48<13:34, 152.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 137728/261597 [15:49<13:19, 154.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 137856/261597 [15:50<13:18, 154.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 137984/261597 [15:51<13:26, 153.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 138112/261597 [15:52<13:37, 151.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 138240/261597 [15:53<13:50, 148.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 138368/261597 [15:53<13:46, 149.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 138496/261597 [15:54<14:01, 146.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 138624/261597 [15:55<14:12, 144.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 138752/261597 [15:56<13:53, 147.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 138880/261597 [15:57<13:51, 147.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 139008/261597 [15:58<13:58, 146.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 139136/261597 [15:59<14:33, 140.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 139264/261597 [16:00<15:17, 133.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 139392/261597 [16:01<15:03, 135.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 139520/261597 [16:02<15:16, 133.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 139648/261597 [16:03<14:40, 138.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 139776/261597 [16:04<14:43, 137.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  53%|█████▎    | 139904/261597 [16:04<14:05, 144.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▎    | 140032/261597 [16:05<13:52, 145.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▎    | 140160/261597 [16:06<13:50, 146.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▎    | 140288/261597 [16:07<14:11, 142.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▎    | 140416/261597 [16:08<13:58, 144.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▎    | 140544/261597 [16:09<14:08, 142.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 140672/261597 [16:10<14:07, 142.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 140800/261597 [16:11<14:20, 140.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 140928/261597 [16:12<14:16, 140.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 141056/261597 [16:12<14:17, 140.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 141184/261597 [16:13<14:22, 139.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 141312/261597 [16:14<14:02, 142.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 141440/261597 [16:15<13:56, 143.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 141568/261597 [16:16<13:35, 147.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 141696/261597 [16:17<13:25, 148.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 141824/261597 [16:18<13:52, 143.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 141952/261597 [16:19<13:57, 142.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 142080/261597 [16:19<13:23, 148.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 142208/261597 [16:20<13:24, 148.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 142336/261597 [16:21<14:14, 139.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  54%|█████▍    | 142464/261597 [16:22<13:35, 146.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 142592/261597 [16:23<13:57, 142.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 142720/261597 [16:24<13:37, 145.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 142848/261597 [16:25<13:32, 146.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 142976/261597 [16:26<13:48, 143.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 143104/261597 [16:27<13:50, 142.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 143232/261597 [16:28<13:43, 143.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 143360/261597 [16:28<13:37, 144.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 143488/261597 [16:29<13:28, 146.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 143616/261597 [16:30<13:26, 146.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 143744/261597 [16:31<13:27, 145.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▍    | 143872/261597 [16:32<13:11, 148.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▌    | 144000/261597 [16:33<13:36, 144.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▌    | 144128/261597 [16:34<13:54, 140.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▌    | 144256/261597 [16:35<13:38, 143.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▌    | 144384/261597 [16:35<13:31, 144.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▌    | 144512/261597 [16:36<13:30, 144.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▌    | 144640/261597 [16:37<14:07, 138.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▌    | 144768/261597 [16:38<14:01, 138.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▌    | 144896/261597 [16:39<14:03, 138.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▌    | 145024/261597 [16:40<13:25, 144.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  55%|█████▌    | 145152/261597 [16:41<13:09, 147.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 145280/261597 [16:42<13:04, 148.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 145408/261597 [16:43<13:26, 144.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 145536/261597 [16:44<13:34, 142.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 145664/261597 [16:44<13:19, 144.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 145792/261597 [16:45<13:33, 142.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 145920/261597 [16:46<13:21, 144.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 146048/261597 [16:47<13:15, 145.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 146176/261597 [16:48<13:21, 144.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 146304/261597 [16:49<13:41, 140.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 146432/261597 [16:50<13:50, 138.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 146560/261597 [16:51<13:26, 142.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 146688/261597 [16:52<13:36, 140.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 146816/261597 [16:53<13:46, 138.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 146944/261597 [16:53<13:10, 145.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▌    | 147072/261597 [16:54<13:09, 144.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▋    | 147200/261597 [16:55<13:04, 145.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▋    | 147328/261597 [16:56<12:44, 149.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▋    | 147456/261597 [16:57<12:33, 151.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▋    | 147584/261597 [16:58<12:22, 153.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  56%|█████▋    | 147712/261597 [16:58<12:12, 155.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 147840/261597 [16:59<12:49, 147.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 147968/261597 [17:00<12:27, 151.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 148096/261597 [17:01<12:15, 154.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 148224/261597 [17:02<12:43, 148.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 148352/261597 [17:03<12:42, 148.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 148480/261597 [17:04<12:28, 151.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 148608/261597 [17:04<12:40, 148.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 148736/261597 [17:05<12:20, 152.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 148864/261597 [17:06<12:34, 149.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 148992/261597 [17:07<12:46, 146.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 149120/261597 [17:08<12:50, 146.01it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 149248/261597 [17:09<12:59, 144.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 149376/261597 [17:10<13:11, 141.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 149504/261597 [17:11<13:12, 141.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 149632/261597 [17:11<12:45, 146.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 149760/261597 [17:12<12:56, 144.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 149888/261597 [17:13<13:11, 141.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 150016/261597 [17:14<12:50, 144.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 150144/261597 [17:15<12:38, 146.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 150272/261597 [17:16<12:57, 143.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  57%|█████▋    | 150400/261597 [17:17<12:36, 146.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 150528/261597 [17:18<12:16, 150.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 150656/261597 [17:19<12:40, 145.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 150784/261597 [17:19<12:53, 143.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 150912/261597 [17:20<12:55, 142.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 151040/261597 [17:21<12:29, 147.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 151168/261597 [17:22<12:42, 144.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 151296/261597 [17:23<12:47, 143.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 151424/261597 [17:24<12:35, 145.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 151552/261597 [17:25<13:10, 139.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 151680/261597 [17:26<13:34, 134.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 151808/261597 [17:27<13:17, 137.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 151936/261597 [17:28<12:47, 142.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 152064/261597 [17:28<12:43, 143.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 152192/261597 [17:29<12:37, 144.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 152320/261597 [17:30<12:39, 143.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 152448/261597 [17:31<12:55, 140.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 152576/261597 [17:32<12:54, 140.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 152704/261597 [17:33<12:53, 140.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 152832/261597 [17:34<12:42, 142.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  58%|█████▊    | 152960/261597 [17:35<12:35, 143.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▊    | 153088/261597 [17:36<12:13, 147.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▊    | 153216/261597 [17:36<12:18, 146.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▊    | 153344/261597 [17:37<11:49, 152.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▊    | 153472/261597 [17:38<11:45, 153.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▊    | 153600/261597 [17:39<11:52, 151.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 153728/261597 [17:40<11:54, 150.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 153856/261597 [17:41<12:28, 143.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 153984/261597 [17:42<12:26, 144.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 154112/261597 [17:42<12:17, 145.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 154240/261597 [17:43<12:02, 148.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 154368/261597 [17:44<12:19, 145.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 154496/261597 [17:45<12:26, 143.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 154624/261597 [17:46<12:15, 145.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 154752/261597 [17:47<12:36, 141.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 154880/261597 [17:48<12:27, 142.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 155008/261597 [17:49<12:36, 140.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 155136/261597 [17:50<12:50, 138.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 155264/261597 [17:51<12:41, 139.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 155392/261597 [17:52<12:49, 137.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 155520/261597 [17:52<12:12, 144.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  59%|█████▉    | 155648/261597 [17:53<12:19, 143.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|█████▉    | 155776/261597 [17:54<12:37, 139.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|█████▉    | 155904/261597 [17:55<12:49, 137.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|█████▉    | 156032/261597 [17:56<12:33, 140.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|█████▉    | 156160/261597 [17:57<12:10, 144.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|█████▉    | 156288/261597 [17:58<11:50, 148.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|█████▉    | 156416/261597 [17:59<11:40, 150.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|█████▉    | 156544/261597 [17:59<11:35, 150.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|█████▉    | 156672/261597 [18:00<12:02, 145.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|█████▉    | 156800/261597 [18:01<12:18, 141.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|█████▉    | 156928/261597 [18:02<12:00, 145.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|██████    | 157056/261597 [18:03<12:09, 143.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|██████    | 157184/261597 [18:04<12:16, 141.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|██████    | 157312/261597 [18:05<12:02, 144.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|██████    | 157440/261597 [18:06<12:31, 138.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|██████    | 157568/261597 [18:07<12:32, 138.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|██████    | 157696/261597 [18:08<12:05, 143.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|██████    | 157824/261597 [18:08<11:42, 147.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|██████    | 157952/261597 [18:09<11:44, 147.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|██████    | 158080/261597 [18:10<11:52, 145.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  60%|██████    | 158208/261597 [18:11<11:49, 145.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 158336/261597 [18:12<12:26, 138.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 158464/261597 [18:13<12:43, 135.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 158592/261597 [18:14<12:28, 137.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 158720/261597 [18:15<12:08, 141.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 158848/261597 [18:16<12:05, 141.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 158976/261597 [18:17<12:22, 138.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 159104/261597 [18:18<12:24, 137.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 159232/261597 [18:19<12:27, 136.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 159360/261597 [18:19<11:58, 142.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 159488/261597 [18:20<11:23, 149.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 159616/261597 [18:21<11:02, 153.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 159744/261597 [18:22<11:03, 153.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 159872/261597 [18:23<11:19, 149.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 160000/261597 [18:24<11:22, 148.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████    | 160128/261597 [18:24<11:18, 149.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████▏   | 160256/261597 [18:25<11:13, 150.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████▏   | 160384/261597 [18:26<11:18, 149.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████▏   | 160512/261597 [18:27<11:24, 147.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████▏   | 160640/261597 [18:28<11:13, 149.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  61%|██████▏   | 160768/261597 [18:29<11:06, 151.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 160896/261597 [18:29<11:02, 152.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 161024/261597 [18:30<11:30, 145.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 161152/261597 [18:31<11:52, 141.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 161280/261597 [18:32<11:24, 146.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 161408/261597 [18:33<11:18, 147.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 161536/261597 [18:34<10:53, 153.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 161664/261597 [18:35<11:07, 149.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 161792/261597 [18:36<11:13, 148.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 161920/261597 [18:37<11:25, 145.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 162048/261597 [18:37<11:18, 146.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 162176/261597 [18:38<11:04, 149.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 162304/261597 [18:39<10:56, 151.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 162432/261597 [18:40<10:59, 150.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 162560/261597 [18:41<10:54, 151.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 162688/261597 [18:42<10:57, 150.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 162816/261597 [18:42<11:04, 148.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 162944/261597 [18:43<11:16, 145.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 163072/261597 [18:44<10:55, 150.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 163200/261597 [18:45<11:17, 145.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 163328/261597 [18:46<11:21, 144.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  62%|██████▏   | 163456/261597 [18:47<11:25, 143.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 163584/261597 [18:48<11:37, 140.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 163712/261597 [18:49<11:25, 142.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 163840/261597 [18:50<11:17, 144.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 163968/261597 [18:50<11:02, 147.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 164096/261597 [18:51<11:15, 144.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 164224/261597 [18:52<11:19, 143.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 164352/261597 [18:53<11:24, 141.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 164480/261597 [18:54<11:11, 144.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 164608/261597 [18:55<11:06, 145.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 164736/261597 [18:56<11:31, 140.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 164864/261597 [18:57<11:37, 138.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 164992/261597 [18:58<11:10, 144.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 165120/261597 [18:58<10:42, 150.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 165248/261597 [18:59<10:58, 146.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 165376/261597 [19:00<10:49, 148.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 165504/261597 [19:01<10:42, 149.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 165632/261597 [19:02<10:48, 147.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 165760/261597 [19:03<10:23, 153.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 165888/261597 [19:03<10:13, 156.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  63%|██████▎   | 166016/261597 [19:04<10:34, 150.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▎   | 166144/261597 [19:05<10:48, 147.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▎   | 166272/261597 [19:06<10:42, 148.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▎   | 166400/261597 [19:07<10:37, 149.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▎   | 166528/261597 [19:08<10:37, 149.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▎   | 166656/261597 [19:09<10:30, 150.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 166784/261597 [19:10<10:45, 146.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 166912/261597 [19:11<10:56, 144.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 167040/261597 [19:11<10:59, 143.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 167168/261597 [19:12<10:53, 144.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 167296/261597 [19:13<10:32, 149.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 167424/261597 [19:14<11:02, 142.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 167552/261597 [19:15<10:52, 144.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 167680/261597 [19:16<10:45, 145.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 167808/261597 [19:17<10:37, 147.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 167936/261597 [19:18<10:47, 144.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 168064/261597 [19:18<10:42, 145.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 168192/261597 [19:19<10:58, 141.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 168320/261597 [19:20<10:59, 141.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 168448/261597 [19:21<10:56, 141.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 168576/261597 [19:22<10:34, 146.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  64%|██████▍   | 168704/261597 [19:23<10:20, 149.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▍   | 168832/261597 [19:24<10:23, 148.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▍   | 168960/261597 [19:25<10:21, 149.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▍   | 169088/261597 [19:25<10:33, 146.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▍   | 169216/261597 [19:26<10:57, 140.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▍   | 169344/261597 [19:27<10:47, 142.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▍   | 169472/261597 [19:28<10:55, 140.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▍   | 169600/261597 [19:29<10:49, 141.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▍   | 169728/261597 [19:30<10:47, 141.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▍   | 169856/261597 [19:31<10:53, 140.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▍   | 169984/261597 [19:32<10:39, 143.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▌   | 170112/261597 [19:33<10:49, 140.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▌   | 170240/261597 [19:34<10:57, 138.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▌   | 170368/261597 [19:35<10:30, 144.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▌   | 170496/261597 [19:35<10:24, 145.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▌   | 170624/261597 [19:36<10:14, 148.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▌   | 170752/261597 [19:37<10:21, 146.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▌   | 170880/261597 [19:38<10:28, 144.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▌   | 171008/261597 [19:39<10:26, 144.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▌   | 171136/261597 [19:40<10:28, 143.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  65%|██████▌   | 171264/261597 [19:41<10:38, 141.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 171392/261597 [19:42<10:18, 145.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 171520/261597 [19:42<10:05, 148.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 171648/261597 [19:43<10:33, 141.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 171776/261597 [19:44<10:52, 137.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 171904/261597 [19:45<10:39, 140.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 172032/261597 [19:46<10:22, 143.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 172160/261597 [19:47<10:18, 144.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 172288/261597 [19:48<10:10, 146.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 172416/261597 [19:49<10:06, 147.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 172544/261597 [19:50<10:03, 147.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 172672/261597 [19:50<09:53, 149.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 172800/261597 [19:51<09:52, 149.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 172928/261597 [19:52<10:09, 145.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 173056/261597 [19:53<10:06, 145.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▌   | 173184/261597 [19:54<10:06, 145.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▋   | 173312/261597 [19:55<09:41, 151.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▋   | 173440/261597 [19:56<09:48, 149.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▋   | 173568/261597 [19:56<09:44, 150.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▋   | 173696/261597 [19:57<09:37, 152.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▋   | 173824/261597 [19:58<09:48, 149.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  66%|██████▋   | 173952/261597 [19:59<10:04, 144.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 174080/261597 [20:00<10:04, 144.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 174208/261597 [20:01<10:05, 144.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 174336/261597 [20:02<10:00, 145.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 174464/261597 [20:03<10:15, 141.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 174592/261597 [20:04<10:01, 144.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 174720/261597 [20:04<10:19, 140.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 174848/261597 [20:05<09:59, 144.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 174976/261597 [20:06<10:29, 137.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 175104/261597 [20:07<10:23, 138.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 175232/261597 [20:08<10:00, 143.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 175360/261597 [20:09<09:40, 148.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 175488/261597 [20:10<09:21, 153.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 175616/261597 [20:11<09:27, 151.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 175744/261597 [20:11<09:13, 155.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 175872/261597 [20:12<09:27, 151.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 176000/261597 [20:13<09:55, 143.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 176128/261597 [20:14<09:52, 144.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 176256/261597 [20:15<09:44, 145.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 176384/261597 [20:16<09:42, 146.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  67%|██████▋   | 176512/261597 [20:17<09:41, 146.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 176640/261597 [20:18<09:50, 143.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 176768/261597 [20:18<09:52, 143.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 176896/261597 [20:19<09:43, 145.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 177024/261597 [20:20<09:56, 141.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 177152/261597 [20:21<09:51, 142.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 177280/261597 [20:22<10:14, 137.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 177408/261597 [20:23<10:11, 137.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 177536/261597 [20:24<10:04, 139.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 177664/261597 [20:25<10:07, 138.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 177792/261597 [20:26<09:53, 141.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 177920/261597 [20:27<09:45, 142.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 178048/261597 [20:28<10:03, 138.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 178176/261597 [20:29<09:59, 139.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 178304/261597 [20:29<09:49, 141.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 178432/261597 [20:30<09:28, 146.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 178560/261597 [20:31<09:26, 146.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 178688/261597 [20:32<09:19, 148.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 178816/261597 [20:33<09:21, 147.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 178944/261597 [20:34<09:18, 147.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  68%|██████▊   | 179072/261597 [20:34<09:03, 151.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▊   | 179200/261597 [20:35<09:12, 149.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▊   | 179328/261597 [20:36<09:27, 145.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▊   | 179456/261597 [20:37<09:39, 141.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▊   | 179584/261597 [20:38<09:18, 146.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▊   | 179712/261597 [20:39<09:17, 146.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▊   | 179840/261597 [20:40<09:04, 150.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 179968/261597 [20:41<09:14, 147.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 180096/261597 [20:41<09:02, 150.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 180224/261597 [20:42<09:12, 147.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 180352/261597 [20:43<09:20, 145.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 180480/261597 [20:44<09:20, 144.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 180608/261597 [20:45<08:43, 154.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 180736/261597 [20:46<08:37, 156.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 180864/261597 [20:47<08:43, 154.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 180992/261597 [20:47<08:37, 155.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 181120/261597 [20:48<08:52, 151.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 181248/261597 [20:49<09:00, 148.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 181376/261597 [20:50<09:03, 147.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 181504/261597 [20:51<08:57, 148.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 181632/261597 [20:52<08:57, 148.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  69%|██████▉   | 181760/261597 [20:53<09:19, 142.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|██████▉   | 181888/261597 [20:54<09:28, 140.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|██████▉   | 182016/261597 [20:55<09:37, 137.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|██████▉   | 182144/261597 [20:55<09:16, 142.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|██████▉   | 182272/261597 [20:56<08:57, 147.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|██████▉   | 182400/261597 [20:57<09:04, 145.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|██████▉   | 182528/261597 [20:58<09:13, 142.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|██████▉   | 182656/261597 [20:59<08:52, 148.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|██████▉   | 182784/261597 [21:00<08:40, 151.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|██████▉   | 182912/261597 [21:01<08:59, 145.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|██████▉   | 183040/261597 [21:02<08:57, 146.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|███████   | 183168/261597 [21:02<08:44, 149.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|███████   | 183296/261597 [21:03<08:58, 145.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|███████   | 183424/261597 [21:04<09:07, 142.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|███████   | 183552/261597 [21:05<09:06, 142.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|███████   | 183680/261597 [21:06<08:49, 147.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|███████   | 183808/261597 [21:07<08:47, 147.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|███████   | 183936/261597 [21:08<09:06, 142.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|███████   | 184064/261597 [21:09<09:01, 143.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|███████   | 184192/261597 [21:10<09:11, 140.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  70%|███████   | 184320/261597 [21:10<09:13, 139.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 184448/261597 [21:11<08:53, 144.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 184576/261597 [21:12<08:58, 142.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 184704/261597 [21:13<08:57, 142.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 184832/261597 [21:14<09:07, 140.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 184960/261597 [21:15<09:21, 136.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 185088/261597 [21:16<09:18, 136.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 185216/261597 [21:17<08:57, 142.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 185344/261597 [21:18<08:52, 143.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 185472/261597 [21:18<08:33, 148.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 185600/261597 [21:19<08:24, 150.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 185728/261597 [21:20<08:23, 150.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 185856/261597 [21:21<08:43, 144.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 185984/261597 [21:22<08:48, 143.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 186112/261597 [21:23<08:47, 143.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 186240/261597 [21:24<08:56, 140.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████   | 186368/261597 [21:25<08:59, 139.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████▏  | 186496/261597 [21:26<08:59, 139.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████▏  | 186624/261597 [21:27<08:39, 144.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████▏  | 186752/261597 [21:27<08:36, 144.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████▏  | 186880/261597 [21:28<08:34, 145.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  71%|███████▏  | 187008/261597 [21:29<08:30, 146.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 187136/261597 [21:30<08:18, 149.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 187264/261597 [21:31<08:23, 147.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 187392/261597 [21:32<08:24, 147.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 187520/261597 [21:33<08:28, 145.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 187648/261597 [21:34<08:59, 137.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 187776/261597 [21:35<08:42, 141.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 187904/261597 [21:35<08:28, 144.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 188032/261597 [21:36<08:37, 142.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 188160/261597 [21:37<08:44, 140.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 188288/261597 [21:38<08:40, 140.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 188416/261597 [21:39<08:44, 139.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 188544/261597 [21:40<08:38, 140.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 188672/261597 [21:41<08:31, 142.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 188800/261597 [21:42<08:20, 145.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 188928/261597 [21:43<08:18, 145.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 189056/261597 [21:43<08:24, 143.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 189184/261597 [21:44<08:16, 145.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 189312/261597 [21:45<08:22, 143.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 189440/261597 [21:46<08:10, 147.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  72%|███████▏  | 189568/261597 [21:47<08:17, 144.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 189696/261597 [21:48<08:09, 146.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 189824/261597 [21:49<08:04, 147.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 189952/261597 [21:50<08:13, 145.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 190080/261597 [21:50<07:55, 150.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 190208/261597 [21:51<08:13, 144.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 190336/261597 [21:52<08:02, 147.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 190464/261597 [21:53<07:49, 151.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 190592/261597 [21:54<07:32, 156.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 190720/261597 [21:55<07:40, 153.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 190848/261597 [21:55<07:52, 149.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 190976/261597 [21:56<07:46, 151.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 191104/261597 [21:57<07:52, 149.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 191232/261597 [21:58<07:43, 151.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 191360/261597 [21:59<07:41, 152.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 191488/261597 [22:00<07:49, 149.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 191616/261597 [22:01<07:41, 151.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 191744/261597 [22:01<07:52, 147.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 191872/261597 [22:03<08:19, 139.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 192000/261597 [22:03<08:15, 140.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 192128/261597 [22:04<07:59, 144.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  73%|███████▎  | 192256/261597 [22:05<07:56, 145.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▎  | 192384/261597 [22:06<07:48, 147.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▎  | 192512/261597 [22:07<07:51, 146.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▎  | 192640/261597 [22:08<07:55, 145.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▎  | 192768/261597 [22:09<07:53, 145.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▎  | 192896/261597 [22:09<07:39, 149.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 193024/261597 [22:10<07:18, 156.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 193152/261597 [22:11<07:32, 151.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 193280/261597 [22:12<07:51, 144.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 193408/261597 [22:13<07:39, 148.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 193536/261597 [22:14<07:57, 142.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 193664/261597 [22:15<07:59, 141.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 193792/261597 [22:16<07:40, 147.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 193920/261597 [22:16<07:48, 144.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 194048/261597 [22:17<07:42, 146.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 194176/261597 [22:18<07:45, 144.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 194304/261597 [22:19<07:43, 145.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 194432/261597 [22:20<07:35, 147.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 194560/261597 [22:21<07:34, 147.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 194688/261597 [22:22<07:57, 140.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  74%|███████▍  | 194816/261597 [22:23<08:12, 135.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▍  | 194944/261597 [22:24<08:11, 135.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▍  | 195072/261597 [22:25<07:52, 140.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▍  | 195200/261597 [22:25<07:50, 141.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▍  | 195328/261597 [22:26<07:55, 139.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▍  | 195456/261597 [22:27<07:53, 139.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▍  | 195584/261597 [22:28<08:00, 137.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▍  | 195712/261597 [22:29<07:57, 137.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▍  | 195840/261597 [22:30<07:52, 139.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▍  | 195968/261597 [22:31<07:34, 144.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▍  | 196096/261597 [22:32<07:38, 142.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 196224/261597 [22:33<07:53, 138.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 196352/261597 [22:34<08:04, 134.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 196480/261597 [22:35<08:02, 134.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 196608/261597 [22:36<07:37, 142.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 196736/261597 [22:36<07:22, 146.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 196864/261597 [22:37<07:17, 147.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 196992/261597 [22:38<07:09, 150.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 197120/261597 [22:39<07:19, 146.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 197248/261597 [22:40<07:04, 151.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 197376/261597 [22:41<07:05, 151.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  75%|███████▌  | 197504/261597 [22:41<07:05, 150.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 197632/261597 [22:42<06:58, 152.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 197760/261597 [22:43<07:00, 151.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 197888/261597 [22:44<07:05, 149.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 198016/261597 [22:45<07:15, 145.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 198144/261597 [22:46<07:06, 148.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 198272/261597 [22:47<07:20, 143.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 198400/261597 [22:48<07:15, 145.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 198528/261597 [22:49<07:22, 142.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 198656/261597 [22:49<07:15, 144.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 198784/261597 [22:50<07:19, 142.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 198912/261597 [22:51<07:34, 137.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 199040/261597 [22:52<07:27, 139.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 199168/261597 [22:53<07:08, 145.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 199296/261597 [22:54<07:19, 141.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▌  | 199424/261597 [22:55<07:07, 145.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▋  | 199552/261597 [22:56<06:58, 148.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▋  | 199680/261597 [22:56<06:56, 148.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▋  | 199808/261597 [22:57<07:13, 142.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▋  | 199936/261597 [22:58<07:06, 144.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  76%|███████▋  | 200064/261597 [22:59<07:00, 146.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 200192/261597 [23:00<06:53, 148.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 200320/261597 [23:01<07:00, 145.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 200448/261597 [23:02<07:00, 145.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 200576/261597 [23:03<06:56, 146.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 200704/261597 [23:03<06:48, 149.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 200832/261597 [23:04<06:46, 149.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 200960/261597 [23:05<06:59, 144.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 201088/261597 [23:06<06:48, 148.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 201216/261597 [23:07<06:52, 146.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 201344/261597 [23:08<06:45, 148.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 201472/261597 [23:09<06:46, 147.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 201600/261597 [23:10<06:50, 146.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 201728/261597 [23:10<06:52, 145.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 201856/261597 [23:11<06:48, 146.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 201984/261597 [23:12<06:40, 149.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 202112/261597 [23:13<06:52, 144.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 202240/261597 [23:14<06:58, 141.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 202368/261597 [23:15<06:53, 143.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 202496/261597 [23:16<06:54, 142.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  77%|███████▋  | 202624/261597 [23:17<07:07, 138.01it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 202752/261597 [23:18<06:54, 141.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 202880/261597 [23:19<06:55, 141.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 203008/261597 [23:19<06:44, 144.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 203136/261597 [23:20<06:33, 148.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 203264/261597 [23:21<06:32, 148.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 203392/261597 [23:22<06:25, 151.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 203520/261597 [23:23<06:26, 150.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 203648/261597 [23:24<06:27, 149.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 203776/261597 [23:25<06:30, 147.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 203904/261597 [23:25<06:33, 146.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 204032/261597 [23:26<06:19, 151.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 204160/261597 [23:27<06:17, 152.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 204288/261597 [23:28<06:05, 156.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 204416/261597 [23:29<06:20, 150.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 204544/261597 [23:30<06:35, 144.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 204672/261597 [23:30<06:17, 150.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 204800/261597 [23:31<06:22, 148.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 204928/261597 [23:32<06:27, 146.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 205056/261597 [23:33<06:25, 146.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 205184/261597 [23:34<06:23, 147.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  78%|███████▊  | 205312/261597 [23:35<06:25, 145.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▊  | 205440/261597 [23:36<06:19, 147.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▊  | 205568/261597 [23:37<06:11, 151.01it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▊  | 205696/261597 [23:37<06:20, 147.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▊  | 205824/261597 [23:38<06:14, 148.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▊  | 205952/261597 [23:39<05:53, 157.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 206080/261597 [23:40<06:04, 152.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 206208/261597 [23:41<05:55, 155.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 206336/261597 [23:42<05:58, 154.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 206464/261597 [23:42<05:50, 157.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 206592/261597 [23:43<06:00, 152.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 206720/261597 [23:44<05:56, 153.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 206848/261597 [23:45<06:11, 147.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 206976/261597 [23:46<06:03, 150.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 207104/261597 [23:47<06:06, 148.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 207232/261597 [23:48<06:08, 147.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 207360/261597 [23:48<06:10, 146.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 207488/261597 [23:49<06:20, 142.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 207616/261597 [23:50<06:12, 144.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 207744/261597 [23:51<06:08, 146.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  79%|███████▉  | 207872/261597 [23:52<06:06, 146.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|███████▉  | 208000/261597 [23:53<06:03, 147.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|███████▉  | 208128/261597 [23:54<05:48, 153.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|███████▉  | 208256/261597 [23:54<06:00, 147.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|███████▉  | 208384/261597 [23:55<05:58, 148.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|███████▉  | 208512/261597 [23:56<05:51, 150.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|███████▉  | 208640/261597 [23:57<06:02, 146.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|███████▉  | 208768/261597 [23:58<06:08, 143.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|███████▉  | 208896/261597 [23:59<06:19, 138.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|███████▉  | 209024/261597 [24:00<06:21, 137.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|███████▉  | 209152/261597 [24:01<06:21, 137.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|████████  | 209280/261597 [24:02<06:29, 134.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|████████  | 209408/261597 [24:03<06:34, 132.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|████████  | 209536/261597 [24:04<06:22, 136.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|████████  | 209664/261597 [24:05<06:12, 139.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|████████  | 209792/261597 [24:05<06:02, 143.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|████████  | 209920/261597 [24:06<05:51, 147.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|████████  | 210048/261597 [24:07<05:54, 145.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|████████  | 210176/261597 [24:08<05:45, 148.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|████████  | 210304/261597 [24:09<05:48, 147.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|████████  | 210432/261597 [24:10<05:31, 154.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  80%|████████  | 210560/261597 [24:10<05:32, 153.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 210688/261597 [24:11<05:37, 150.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 210816/261597 [24:12<05:38, 149.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 210944/261597 [24:13<05:33, 151.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 211072/261597 [24:14<05:43, 147.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 211200/261597 [24:15<05:28, 153.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 211328/261597 [24:16<05:26, 153.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 211456/261597 [24:16<05:21, 155.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 211584/261597 [24:17<05:17, 157.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 211712/261597 [24:18<05:28, 151.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 211840/261597 [24:19<05:18, 156.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 211968/261597 [24:20<05:25, 152.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 212096/261597 [24:21<05:26, 151.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 212224/261597 [24:22<05:35, 147.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 212352/261597 [24:22<05:24, 151.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████  | 212480/261597 [24:23<05:24, 151.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████▏ | 212608/261597 [24:24<05:36, 145.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████▏ | 212736/261597 [24:25<05:22, 151.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████▏ | 212864/261597 [24:26<05:27, 148.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████▏ | 212992/261597 [24:27<05:28, 148.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  81%|████████▏ | 213120/261597 [24:28<05:40, 142.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 213248/261597 [24:29<05:38, 142.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 213376/261597 [24:30<05:54, 136.01it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 213504/261597 [24:31<05:53, 135.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 213632/261597 [24:31<05:41, 140.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 213760/261597 [24:32<05:28, 145.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 213888/261597 [24:33<05:32, 143.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 214016/261597 [24:34<05:36, 141.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 214144/261597 [24:35<05:36, 141.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 214272/261597 [24:36<05:35, 141.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 214400/261597 [24:37<05:33, 141.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 214528/261597 [24:38<05:21, 146.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 214656/261597 [24:38<05:18, 147.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 214784/261597 [24:39<05:08, 151.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 214912/261597 [24:40<05:06, 152.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 215040/261597 [24:41<05:03, 153.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 215168/261597 [24:42<04:58, 155.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 215296/261597 [24:42<04:59, 154.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 215424/261597 [24:43<05:06, 150.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 215552/261597 [24:44<05:12, 147.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 215680/261597 [24:45<05:11, 147.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  82%|████████▏ | 215808/261597 [24:46<05:09, 147.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 215936/261597 [24:47<05:06, 148.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 216064/261597 [24:48<05:12, 145.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 216192/261597 [24:49<05:13, 144.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 216320/261597 [24:50<05:07, 147.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 216448/261597 [24:50<05:04, 148.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 216576/261597 [24:51<04:56, 151.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 216704/261597 [24:52<04:54, 152.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 216832/261597 [24:53<04:58, 150.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 216960/261597 [24:54<04:55, 150.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 217088/261597 [24:55<05:07, 144.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 217216/261597 [24:55<04:54, 150.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 217344/261597 [24:56<04:55, 149.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 217472/261597 [24:57<04:54, 149.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 217600/261597 [24:58<04:49, 151.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 217728/261597 [24:59<04:44, 154.01it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 217856/261597 [25:00<04:56, 147.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 217984/261597 [25:01<04:56, 147.01it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 218112/261597 [25:01<04:48, 150.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 218240/261597 [25:02<04:45, 151.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  83%|████████▎ | 218368/261597 [25:03<04:43, 152.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▎ | 218496/261597 [25:04<04:42, 152.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▎ | 218624/261597 [25:05<04:53, 146.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▎ | 218752/261597 [25:06<04:56, 144.50it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▎ | 218880/261597 [25:07<04:51, 146.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▎ | 219008/261597 [25:07<04:46, 148.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 219136/261597 [25:08<04:36, 153.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 219264/261597 [25:09<04:38, 151.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 219392/261597 [25:10<04:50, 145.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 219520/261597 [25:11<04:48, 145.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 219648/261597 [25:12<04:48, 145.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 219776/261597 [25:13<04:39, 149.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 219904/261597 [25:13<04:33, 152.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 220032/261597 [25:14<04:30, 153.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 220160/261597 [25:15<04:41, 147.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 220288/261597 [25:16<04:41, 146.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 220416/261597 [25:17<04:54, 139.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 220544/261597 [25:18<04:55, 138.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 220672/261597 [25:19<05:00, 136.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 220800/261597 [25:20<04:47, 141.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  84%|████████▍ | 220928/261597 [25:21<04:48, 140.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 221056/261597 [25:22<04:42, 143.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 221184/261597 [25:22<04:39, 144.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 221312/261597 [25:23<04:35, 146.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 221440/261597 [25:24<04:38, 144.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 221568/261597 [25:25<04:35, 145.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 221696/261597 [25:26<04:29, 147.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 221824/261597 [25:27<04:24, 150.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 221952/261597 [25:28<04:27, 148.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 222080/261597 [25:28<04:26, 148.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 222208/261597 [25:29<04:22, 150.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▍ | 222336/261597 [25:30<04:22, 149.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▌ | 222464/261597 [25:31<04:35, 142.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▌ | 222592/261597 [25:32<04:36, 141.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▌ | 222720/261597 [25:33<04:29, 144.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▌ | 222848/261597 [25:34<04:24, 146.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▌ | 222976/261597 [25:35<04:30, 142.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▌ | 223104/261597 [25:36<04:25, 145.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▌ | 223232/261597 [25:36<04:26, 144.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▌ | 223360/261597 [25:37<04:21, 146.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▌ | 223488/261597 [25:38<04:27, 142.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  85%|████████▌ | 223616/261597 [25:39<04:21, 145.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 223744/261597 [25:40<04:16, 147.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 223872/261597 [25:41<04:15, 147.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 224000/261597 [25:42<04:13, 148.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 224128/261597 [25:43<04:12, 148.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 224256/261597 [25:43<04:03, 153.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 224384/261597 [25:44<03:56, 157.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 224512/261597 [25:45<03:58, 155.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 224640/261597 [25:46<04:05, 150.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 224768/261597 [25:47<03:59, 153.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 224896/261597 [25:47<04:00, 152.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 225024/261597 [25:48<04:01, 151.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 225152/261597 [25:49<04:01, 151.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 225280/261597 [25:50<03:58, 152.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 225408/261597 [25:51<04:11, 143.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▌ | 225536/261597 [25:52<04:14, 141.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▋ | 225664/261597 [25:53<04:13, 141.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▋ | 225792/261597 [25:54<04:05, 146.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▋ | 225920/261597 [25:55<04:04, 146.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▋ | 226048/261597 [25:55<04:07, 143.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  86%|████████▋ | 226176/261597 [25:56<04:00, 147.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 226304/261597 [25:57<03:56, 149.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 226432/261597 [25:58<03:52, 151.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 226560/261597 [25:59<03:46, 154.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 226688/261597 [26:00<03:52, 150.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 226816/261597 [26:00<03:49, 151.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 226944/261597 [26:01<03:56, 146.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 227072/261597 [26:02<04:09, 138.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 227200/261597 [26:03<04:18, 133.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 227328/261597 [26:04<04:06, 139.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 227456/261597 [26:05<04:05, 138.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 227584/261597 [26:06<03:54, 144.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 227712/261597 [26:07<03:55, 143.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 227840/261597 [26:08<03:54, 144.15it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 227968/261597 [26:09<03:56, 142.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 228096/261597 [26:10<03:51, 144.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 228224/261597 [26:11<03:57, 140.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 228352/261597 [26:12<04:05, 135.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 228480/261597 [26:13<04:02, 136.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 228608/261597 [26:13<03:58, 138.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 228736/261597 [26:14<03:56, 139.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  87%|████████▋ | 228864/261597 [26:15<03:54, 139.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 228992/261597 [26:16<03:52, 140.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 229120/261597 [26:17<03:44, 144.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 229248/261597 [26:18<03:39, 147.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 229376/261597 [26:19<03:40, 146.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 229504/261597 [26:20<03:36, 147.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 229632/261597 [26:20<03:42, 143.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 229760/261597 [26:21<03:42, 142.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 229888/261597 [26:22<03:36, 146.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 230016/261597 [26:23<03:39, 143.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 230144/261597 [26:24<03:34, 146.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 230272/261597 [26:25<03:31, 148.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 230400/261597 [26:26<03:30, 148.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 230528/261597 [26:26<03:26, 150.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 230656/261597 [26:27<03:31, 146.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 230784/261597 [26:28<03:43, 137.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 230912/261597 [26:29<03:38, 140.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 231040/261597 [26:30<03:33, 142.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 231168/261597 [26:31<03:26, 147.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 231296/261597 [26:32<03:26, 147.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  88%|████████▊ | 231424/261597 [26:33<03:22, 148.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▊ | 231552/261597 [26:34<03:29, 143.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▊ | 231680/261597 [26:35<03:26, 144.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▊ | 231808/261597 [26:35<03:24, 145.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▊ | 231936/261597 [26:36<03:26, 143.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▊ | 232064/261597 [26:37<03:18, 148.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 232192/261597 [26:38<03:14, 151.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 232320/261597 [26:39<03:18, 147.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 232448/261597 [26:40<03:17, 147.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 232576/261597 [26:41<03:18, 146.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 232704/261597 [26:42<03:19, 144.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 232832/261597 [26:42<03:18, 144.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 232960/261597 [26:43<03:19, 143.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 233088/261597 [26:44<03:18, 143.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 233216/261597 [26:45<03:18, 143.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 233344/261597 [26:46<03:17, 142.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 233472/261597 [26:47<03:14, 144.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 233600/261597 [26:48<03:15, 143.43it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 233728/261597 [26:49<03:09, 147.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 233856/261597 [26:49<03:10, 145.72it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 233984/261597 [26:50<03:06, 148.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  89%|████████▉ | 234112/261597 [26:51<03:07, 146.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|████████▉ | 234240/261597 [26:52<03:11, 142.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|████████▉ | 234368/261597 [26:53<03:05, 146.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|████████▉ | 234496/261597 [26:54<02:57, 152.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|████████▉ | 234624/261597 [26:55<02:53, 155.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|████████▉ | 234752/261597 [26:55<02:52, 155.66it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|████████▉ | 234880/261597 [26:56<02:56, 151.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|████████▉ | 235008/261597 [26:57<02:53, 153.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|████████▉ | 235136/261597 [26:58<02:50, 155.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|████████▉ | 235264/261597 [26:59<02:49, 155.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|████████▉ | 235392/261597 [27:00<02:54, 150.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|█████████ | 235520/261597 [27:00<02:53, 149.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|█████████ | 235648/261597 [27:01<02:56, 146.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|█████████ | 235776/261597 [27:02<02:47, 154.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|█████████ | 235904/261597 [27:03<02:49, 151.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|█████████ | 236032/261597 [27:04<02:43, 156.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|█████████ | 236160/261597 [27:05<02:44, 154.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|█████████ | 236288/261597 [27:05<02:45, 153.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|█████████ | 236416/261597 [27:06<02:43, 154.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|█████████ | 236544/261597 [27:07<02:45, 151.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  90%|█████████ | 236672/261597 [27:08<02:43, 152.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 236800/261597 [27:09<02:45, 149.83it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 236928/261597 [27:10<02:43, 150.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 237056/261597 [27:11<02:49, 144.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 237184/261597 [27:12<02:47, 145.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 237312/261597 [27:12<02:46, 146.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 237440/261597 [27:13<02:42, 148.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 237568/261597 [27:14<02:50, 141.07it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 237696/261597 [27:15<02:48, 142.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 237824/261597 [27:16<02:47, 142.02it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 237952/261597 [27:17<02:48, 140.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 238080/261597 [27:18<02:47, 140.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 238208/261597 [27:19<02:40, 146.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 238336/261597 [27:20<02:41, 144.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 238464/261597 [27:21<02:42, 142.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████ | 238592/261597 [27:21<02:36, 146.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████▏| 238720/261597 [27:22<02:42, 140.37it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████▏| 238848/261597 [27:23<02:46, 136.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████▏| 238976/261597 [27:24<02:48, 134.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████▏| 239104/261597 [27:25<02:48, 133.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████▏| 239232/261597 [27:26<02:43, 136.63it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  91%|█████████▏| 239360/261597 [27:27<02:40, 138.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 239488/261597 [27:28<02:39, 138.22it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 239616/261597 [27:29<02:36, 140.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 239744/261597 [27:30<02:32, 143.11it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 239872/261597 [27:31<02:27, 146.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 240000/261597 [27:31<02:22, 151.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 240128/261597 [27:32<02:26, 146.18it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 240256/261597 [27:33<02:26, 146.17it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 240384/261597 [27:34<02:24, 146.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 240512/261597 [27:35<02:24, 146.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 240640/261597 [27:36<02:23, 145.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 240768/261597 [27:37<02:24, 143.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 240896/261597 [27:38<02:21, 146.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 241024/261597 [27:38<02:17, 149.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 241152/261597 [27:39<02:16, 149.95it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 241280/261597 [27:40<02:14, 150.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 241408/261597 [27:41<02:11, 153.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 241536/261597 [27:42<02:09, 154.80it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 241664/261597 [27:42<02:07, 156.53it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 241792/261597 [27:43<02:06, 156.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  92%|█████████▏| 241920/261597 [27:44<02:11, 149.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 242048/261597 [27:45<02:10, 150.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 242176/261597 [27:46<02:11, 147.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 242304/261597 [27:47<02:14, 143.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 242432/261597 [27:48<02:13, 143.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 242560/261597 [27:49<02:08, 147.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 242688/261597 [27:50<02:11, 143.60it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 242816/261597 [27:50<02:09, 144.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 242944/261597 [27:51<02:10, 143.46it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 243072/261597 [27:52<02:09, 143.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 243200/261597 [27:53<02:03, 148.42it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 243328/261597 [27:54<02:02, 149.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 243456/261597 [27:55<02:00, 150.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 243584/261597 [27:56<02:01, 147.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 243712/261597 [27:56<02:02, 146.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 243840/261597 [27:57<02:01, 146.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 243968/261597 [27:58<01:56, 150.68it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 244096/261597 [27:59<01:55, 151.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 244224/261597 [28:00<01:56, 149.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 244352/261597 [28:01<01:56, 147.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  93%|█████████▎| 244480/261597 [28:02<01:53, 150.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▎| 244608/261597 [28:02<01:51, 152.87it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▎| 244736/261597 [28:03<01:56, 145.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▎| 244864/261597 [28:04<01:55, 144.31it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▎| 244992/261597 [28:05<01:57, 141.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▎| 245120/261597 [28:06<01:56, 141.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 245248/261597 [28:07<01:50, 147.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 245376/261597 [28:08<01:50, 146.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 245504/261597 [28:09<01:46, 151.14it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 245632/261597 [28:09<01:48, 147.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 245760/261597 [28:10<01:48, 146.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 245888/261597 [28:11<01:50, 142.04it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 246016/261597 [28:12<01:49, 142.55it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 246144/261597 [28:13<01:45, 146.09it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 246272/261597 [28:14<01:45, 145.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 246400/261597 [28:15<01:42, 147.81it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 246528/261597 [28:16<01:41, 148.23it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 246656/261597 [28:17<01:41, 146.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 246784/261597 [28:17<01:41, 145.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 246912/261597 [28:18<01:37, 149.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 247040/261597 [28:19<01:39, 145.78it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  94%|█████████▍| 247168/261597 [28:20<01:40, 143.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▍| 247296/261597 [28:21<01:37, 146.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▍| 247424/261597 [28:22<01:34, 150.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▍| 247552/261597 [28:23<01:33, 150.40it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▍| 247680/261597 [28:24<01:37, 142.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▍| 247808/261597 [28:24<01:37, 141.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▍| 247936/261597 [28:25<01:35, 143.79it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▍| 248064/261597 [28:26<01:36, 140.19it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▍| 248192/261597 [28:27<01:38, 136.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▍| 248320/261597 [28:28<01:38, 134.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▍| 248448/261597 [28:29<01:34, 139.05it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▌| 248576/261597 [28:30<01:29, 145.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▌| 248704/261597 [28:31<01:30, 142.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▌| 248832/261597 [28:32<01:27, 145.21it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▌| 248960/261597 [28:33<01:26, 145.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▌| 249088/261597 [28:33<01:23, 149.48it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▌| 249216/261597 [28:34<01:23, 148.82it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▌| 249344/261597 [28:35<01:23, 146.49it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▌| 249472/261597 [28:36<01:23, 145.20it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▌| 249600/261597 [28:37<01:20, 148.58it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  95%|█████████▌| 249728/261597 [28:38<01:19, 149.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 249856/261597 [28:39<01:19, 148.52it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 249984/261597 [28:39<01:18, 147.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 250112/261597 [28:40<01:16, 149.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 250240/261597 [28:41<01:17, 146.75it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 250368/261597 [28:42<01:17, 144.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 250496/261597 [28:43<01:16, 145.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 250624/261597 [28:44<01:14, 147.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 250752/261597 [28:45<01:16, 142.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 250880/261597 [28:46<01:15, 142.10it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 251008/261597 [28:47<01:13, 144.90it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 251136/261597 [28:48<01:14, 140.65it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 251264/261597 [28:48<01:09, 147.85it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 251392/261597 [28:49<01:10, 144.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 251520/261597 [28:50<01:10, 143.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 251648/261597 [28:51<01:07, 147.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▌| 251776/261597 [28:52<01:05, 150.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▋| 251904/261597 [28:53<01:08, 141.38it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▋| 252032/261597 [28:54<01:09, 138.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▋| 252160/261597 [28:55<01:10, 133.16it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▋| 252288/261597 [28:56<01:09, 133.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  96%|█████████▋| 252416/261597 [28:57<01:09, 132.61it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 252544/261597 [28:58<01:07, 133.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 252672/261597 [28:59<01:04, 137.84it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 252800/261597 [28:59<01:01, 143.89it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 252928/261597 [29:00<00:58, 148.13it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 253056/261597 [29:01<00:58, 145.99it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 253184/261597 [29:02<00:58, 144.34it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 253312/261597 [29:03<00:55, 148.57it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 253440/261597 [29:04<00:56, 144.86it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 253568/261597 [29:05<00:56, 142.69it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 253696/261597 [29:06<00:55, 143.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 253824/261597 [29:06<00:53, 144.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 253952/261597 [29:07<00:54, 139.71it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 254080/261597 [29:08<00:55, 135.41it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 254208/261597 [29:09<00:55, 133.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 254336/261597 [29:10<00:54, 134.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 254464/261597 [29:11<00:51, 139.67it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 254592/261597 [29:12<00:52, 134.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 254720/261597 [29:13<00:51, 132.27it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 254848/261597 [29:14<00:48, 138.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  97%|█████████▋| 254976/261597 [29:15<00:45, 144.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 255104/261597 [29:16<00:44, 146.59it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 255232/261597 [29:17<00:43, 145.32it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 255360/261597 [29:17<00:42, 146.36it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 255488/261597 [29:18<00:40, 151.92it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 255616/261597 [29:19<00:38, 156.01it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 255744/261597 [29:20<00:38, 153.98it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 255872/261597 [29:21<00:37, 153.93it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 256000/261597 [29:22<00:37, 148.73it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 256128/261597 [29:22<00:37, 147.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 256256/261597 [29:23<00:36, 145.35it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 256384/261597 [29:24<00:36, 144.77it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 256512/261597 [29:25<00:34, 145.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 256640/261597 [29:26<00:33, 149.70it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 256768/261597 [29:27<00:31, 151.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 256896/261597 [29:28<00:31, 147.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 257024/261597 [29:28<00:30, 151.54it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 257152/261597 [29:29<00:29, 148.91it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 257280/261597 [29:30<00:28, 149.33it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 257408/261597 [29:31<00:28, 149.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 257536/261597 [29:32<00:26, 150.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  98%|█████████▊| 257664/261597 [29:33<00:27, 144.97it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▊| 257792/261597 [29:34<00:26, 141.39it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▊| 257920/261597 [29:35<00:26, 140.74it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▊| 258048/261597 [29:36<00:25, 138.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▊| 258176/261597 [29:37<00:25, 136.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▊| 258304/261597 [29:38<00:23, 139.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 258432/261597 [29:38<00:22, 143.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 258560/261597 [29:39<00:21, 139.08it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 258688/261597 [29:40<00:20, 139.30it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 258816/261597 [29:41<00:19, 141.88it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 258944/261597 [29:42<00:19, 136.28it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 259072/261597 [29:43<00:19, 131.51it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 259200/261597 [29:44<00:18, 132.44it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 259328/261597 [29:45<00:16, 134.76it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 259456/261597 [29:46<00:15, 139.56it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 259584/261597 [29:47<00:14, 142.25it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 259712/261597 [29:48<00:12, 145.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 259840/261597 [29:48<00:12, 144.29it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 259968/261597 [29:49<00:10, 148.64it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 260096/261597 [29:50<00:10, 145.03it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts:  99%|█████████▉| 260224/261597 [29:51<00:09, 146.24it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts: 100%|█████████▉| 260352/261597 [29:52<00:08, 141.62it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts: 100%|█████████▉| 260480/261597 [29:53<00:07, 142.26it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts: 100%|█████████▉| 260608/261597 [29:54<00:06, 143.47it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts: 100%|█████████▉| 260736/261597 [29:55<00:06, 142.96it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts: 100%|█████████▉| 260864/261597 [29:56<00:05, 142.06it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts: 100%|█████████▉| 260992/261597 [29:57<00:04, 136.00it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts: 100%|█████████▉| 261120/261597 [29:57<00:03, 142.12it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts: 100%|█████████▉| 261248/261597 [29:58<00:02, 138.45it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts: 100%|█████████▉| 261376/261597 [29:59<00:01, 141.94it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Encoding texts: 100%|█████████▉| 261504/261597 [30:00<00:00, 145.77it/s]

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Encoding texts: 100%|██████████| 261597/261597 [30:01<00:00, 145.22it/s]


In [None]:
import pandas as pd
import numpy as np
import torch
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

'''
    Input:
        Fine-tuned model
        public_test.csv
    Output:
        encoded_public_test_finetuned.json
'''

# Encode public_test
df = pd.read_csv('/kaggle/input/bkai-ai-track2-legal-document-retrieval/Legal Document Retrieval/public_test.csv')

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = SentenceTransformer('/kaggle/input/finetuned-biencoder/final', device=device)

def encode(lst = [], convert_to_tensor=True, batch_size=128):
    vectors = []
    with tqdm(total=len(lst), desc="Encoding questions") as pbar:
        for i in range(0, len(lst), batch_size):
            batch = lst[i:i + batch_size]
            encoded_batch = model.encode(batch, convert_to_tensor=True)
            if device == 'cuda':
                encoded_batch = encoded_batch.cpu()
            vectors.extend([np.array(arr) for arr in encoded_batch.numpy()])
            pbar.update(len(batch))
    return vectors

df['question_vector'] = encode(lst=list(df['question']))

output_df = df[['question', 'qid', 'question_vector']]

output_df.to_json('encoded_public_test_finetuned.json')

# Predict the 50 top relevant responses for public_test using the fine-tuned model
- Input: encoded_corpus_finetuned.json and encoded_public_test.json

In [2]:
import torch
import torch.nn.functional as F
import pandas as pd
import zipfile
import json
'''
    Input: encoded_corpus_finetuned.json and encoded_public_test.json
    Output: top 50 predictions
'''

train_df = pd.read_json('/kaggle/input/finetuned-corpus-and-test/encoded_public_test_finetuned/kaggle/working/encoded_public_test_finetuned.json')
corpus_df = pd.read_json('/kaggle/input/finetuned-corpus-and-test/encoded_corpus_finetuned/kaggle/working/encoded_corpus.json')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")


train_ids = train_df['qid'].tolist()
train_vectors = torch.tensor(train_df['vector'].tolist(), dtype=torch.float32).to(device)
corpus_ids = corpus_df['cid'].tolist()
try:
    corpus_vectors = torch.tensor(corpus_df['vector'].tolist(), dtype=torch.float32).to(device)
except:
    corpus_vectors = torch.tensor(corpus_df['question_vector'].tolist(), dtype=torch.float32).to(device)


train_vectors = F.normalize(train_vectors, p=2, dim=1)
corpus_vectors = F.normalize(corpus_vectors, p=2, dim=1)

similarity_matrix = torch.matmul(train_vectors, corpus_vectors.T)

top_k = 50  
top_k_values, top_k_indices = torch.topk(similarity_matrix, k=top_k, dim=1)

json_results = []

with open('predict_top50_finetuned.txt', 'w') as f:
    for i, (indices, scores) in enumerate(zip(top_k_indices, top_k_values)):
        qid = train_ids[i]
        
        top_cids = [str(corpus_ids[idx.item()]) for idx in indices]
        similarity_scores = [score.item() for score in scores]
        
        line = f"{qid} {' '.join(top_cids)}\n"
        f.write(line)
        
        json_entry = {
            "query_id": qid,
            "candidates": {
                "doc_ids": top_cids,
                "scores": similarity_scores,
            }
        }
        json_results.append(json_entry)

with open('predict_top50_finetuned.json', 'w', encoding='utf-8') as f:
    json.dump(json_results, f, ensure_ascii=False, indent=2)

with zipfile.ZipFile('predict_top50_finetuned.zip', 'w') as zipf:
    zipf.write('predict_top50_finetuned.txt')
    zipf.write('predict_top50_finetuned.json')

Using device: cuda


# Re-rank the top 50 prediction using cross-encoder

In [5]:
import pandas as pd
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import json

predictions = pd.read_json('/kaggle/input/top50finetuned/predict_top50_finetuned.json')
test_df = pd.read_csv('/kaggle/input/bkai-ai-track2-legal-document-retrieval/Legal Document Retrieval/public_test.csv')
corpus_df = pd.read_csv('/kaggle/input/preprocessed-corpus/preprocessed_corpus.csv')


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = AutoTokenizer.from_pretrained('namdp-ptit/ViRanker')
model = AutoModelForSequenceClassification.from_pretrained('namdp-ptit/ViRanker').to(device)
model.eval()

reranked_results = []

for _, row in predictions.iterrows():
    qid = row['query_id']  
    query_text = test_df['question'][test_df['qid'] == qid].iloc[0]
    doc_ids = row['candidates']['doc_ids']
    
    print(f"Processing qid: {qid}")
    print(f"First few doc_ids: {doc_ids[:3]}")
    
    pairs = []
    for doc_id in doc_ids:
        texts = corpus_df.loc[corpus_df['cid'] == doc_id, 'text'].tolist()
        for text in texts:
            pairs.append((query_text, text))

    batch_size = 8 
    all_scores = []

    with torch.no_grad():
        for i in range(0, len(pairs), batch_size):
            batch_pairs = pairs[i:i + batch_size]
            inputs = tokenizer(batch_pairs, padding=True, truncation=True, 
                            return_tensors='pt', max_length=512)
            inputs = {k: v.to(device) for k, v in inputs.items()}
            
            scores = model(**inputs, return_dict=True).logits.view(-1,).float()
            all_scores.extend(scores.cpu().numpy().tolist())

    json_entry = {
        "query_id": qid,
        "candidates": {
            "doc_ids": doc_ids,
            "scores": all_scores
        }
    }
    reranked_results.append(json_entry)

with open('predict_top50_reranked.json', 'w') as f:
    json.dump(reranked_results, f, indent=2)


Processing qid: 98440
First few doc_ids: ['77191', '230002', '111380']
Processing qid: 105737
First few doc_ids: ['51573', '229317', '470960']
Processing qid: 106239
First few doc_ids: ['214239', '101722', '184177']
Processing qid: 79491
First few doc_ids: ['50701', '64708', '64709']
Processing qid: 130557
First few doc_ids: ['36248', '3788', '193882']
Processing qid: 109476
First few doc_ids: ['139187', '557644', '572543']
Processing qid: 64585
First few doc_ids: ['136490', '35028', '256964']
Processing qid: 5785
First few doc_ids: ['108373', '63709', '65892']
Processing qid: 159576
First few doc_ids: ['141320', '126678', '162297']
Processing qid: 36563
First few doc_ids: ['534271', '214685', '593864']
Processing qid: 43191
First few doc_ids: ['71162', '167857', '71161']
Processing qid: 139105
First few doc_ids: ['25388', '73944', '25362']
Processing qid: 102773
First few doc_ids: ['88260', '72922', '210050']
Processing qid: 32598
First few doc_ids: ['90380', '140526', '236329']
Proce