In [None]:
!pip install webdataset
!pip install faiss-cpu

In [None]:
import tarfile
import json
import os
import torch
from datasets import Dataset
import webdataset as wds
import hashlib
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
dataset = wds.WebDataset("/kaggle/input/ragdata/shard_aa/shard_aa").decode()

for i, sample in enumerate(dataset):
    print("\nSample", i)
    print("Keys:", sample.keys())
    if "txt" in sample:
        print("Text Preview:", sample["txt"][:400])  # first 400 chars
    if "json" in sample:
        print("JSON Preview:", str(sample["json"])[:400])
    if i == 3:   # stop after a few
        break

In [None]:
shard_paths = [
    "/kaggle/input/ragdata/shard_aa/shard_aa",
    "//kaggle/input/ragdata/shard_ab/shard_ab",
    "//kaggle/input/ragdata/shard_ac/shard_ac",
    "//kaggle/input/ragdata/shard_ad/shard_ad",
    "//kaggle/input/ragdata/shard_ae/shard_ae",
]

dataset = wds.WebDataset(shard_paths).decode()

In [None]:
MODEL_NAME = 'all-MiniLM-L6-v2'
CHUNK_SIZE = 256  # Max tokens for the embedding model
CHUNK_OVERLAP = 32 

print(f"\nLoading sentence-transformer model: '{MODEL_NAME}'...")
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = SentenceTransformer(MODEL_NAME, device=device)
print(f"Model loaded successfully on device: '{device}'")


# Initialize a text splitter to handle long documents.
# This ensures that no text passed to the model exceeds its token limit.
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    length_function=len, 
    is_separator_regex=False,
)

In [None]:
embedded_chunks = []

# Iterate through each sample in the dataset
for sample in dataset:
    key = sample.get('__key__', 'unknown_key')
    text_content = ""

    # --- a. Extract Text Content ---
    # Determine the source of text and extract it.
    if 'txt' in sample:
        text_content = sample['txt']
    elif 'json' in sample:
        json_data = sample['json']
        if 'search_results' in json_data and isinstance(json_data.get('search_results'), list):
            # Concatenate snippets from search results
            snippets = [result.get('snippet', '') for result in json_data['search_results']]
            text_content = ' '.join(filter(None, snippets)) # Filter out empty snippets
        else:
            # As a fallback, serialize the entire JSON object to a string
            text_content = json.dumps(json_data)
    
    if not text_content or not text_content.strip():
        print(f"  - WARNING: Skipping sample '{key}' because it has no text content.")
        continue

    # --- b. Chunk the Text ---
    # The splitter will return a list with one item if the text is short,
    # or multiple chunks if it's long.
    chunks = text_splitter.split_text(text_content)
    print(f"  - Processing sample '{key}': Extracted text split into {len(chunks)} chunk(s).")

    # --- c. Generate Embeddings for Each Chunk ---
    # model.encode can process a list of texts at once, which is very efficient.
    chunk_embeddings = model.encode(chunks, convert_to_tensor=False) # Get numpy arrays

    # --- d. Store the Results ---
    for i, chunk in enumerate(chunks):
        embedded_chunks.append({
            'original_key': key,      # The key of the source document
            'chunk_index': i,         # The index of this chunk within the document
            'chunk_text': chunk,      # The actual text of the chunk
            'embedding': chunk_embeddings[i] # The embedding for this chunk
        })


  - Processing sample 'final_train/10306552__OQ__O-2006-0005__EN_e41489': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/059343-5107_doublejeopardy_moviequotes_400_bc2cf9': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/29785339__WQA__E-2011-001330__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/086882-4594_doublejeopardy_formercapitals_800': Extracted text split into 126 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/13510333__WQ__E-2007-1033__EN_cc76eb': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8180472__QT__H-2004-0384__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/349151__QT__H-2002-0816__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/121038-4172_doublejeopardy_trickyquestions_400': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/7395271__WQ__E-1999-0248__EN_732ce1': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18105654__WQ__E-2008-1875__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33012447__WQ__E-2011-012613__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/001124-4085_doublejeopardy_enthebeginning_2000': Extracted text split into 152 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/028893-4105_jeopardy_mand_400': Extracted text split into 138 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/20163200__MOTION__P6-RC-2008-0571__EN': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17596386__WQ__E-2008-0748__EN_2c6c0c': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/144565-3904_doublejeopardy_moviesoftheyear_800': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/23048697__IM-PRESS__20090828-IPR-59848__EN_f5ac64': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22453621__IM-PRESS__20090430-IPR-54824__EN_fa9181': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23189077__COMPARL__2009-09-28-1__EN_562a30': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/196168-5003_doublejeopardy_memorablemovies_1600': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30888362__WQA__E-2011-003969__EN_8d1e46': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31240325__WQ__E-2011-007280__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/035053-5183_doublejeopardy_1980sbestsellers_800': Extracted text split into 59 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/6944198__TA__P5-TA-2002-0517__EN_69d852': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/098448-5190_jeopardy_shamrocks_1000': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32301000__WQ__E-2011-010155__EN_ccdec2': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16572433__IM-PRESS__20061026-STO-12284__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32435835__WQ__E-2011-010550__EN_d020b4': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/172878-3342_doublejeopardy_rivers_600': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/091473-3826_jeopardy_lances_300': Extracted text split into 137 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/22203056__WQA__E-2008-7023__EN_5fc753': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_57746': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/027369-2339_doublejeopardy_agriculture_600': Extracted text split into 149 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_65973': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21347701__IM-PRESS__20090209-IPR-48793__EN': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/093519-2827_doublejeopardy_food_200': Extracted text split into 134 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_1128': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20626220__IMP-CONTRIB__20081203-CAN-43487__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_36341': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/113765-3611_jeopardy_rocknpop_200_d5aa7c': Extracted text split into 151 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/20514011__WQA__E-2008-5312__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16048826__WQA__E-2007-3027__EN': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11401691__WQA__E-2004-2087__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/5650502__WQA__E-2003-0280__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/180273-970_doublejeopardy_literature_1000': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/28730799__WQ__E-2011-000623__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14774538__WQA__E-2007-1112__EN': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11478400__WQ__E-2006-1791__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32874292__WQA__E-2011-009988__EN_21a8f5': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/042084-2127_jeopardy_thecars_500_ddde18': Extracted text split into 70 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/121086-3248_jeopardy_rocksingers_400': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/26023208__WQA__E-2009-0138__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/008309-4946_jeopardy_demons_200': Extracted text split into 135 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/11913226__WQ__E-2006-3587__EN_ce5654': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28697256__WQ__P-2011-000645__EN_fff9d6': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19430920__WQ__E-2008-4503__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_1923': Extracted text split into 45 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/177063-2571_doublejeopardy_ballet_1000': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15915677__WQ__E-2007-4855__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_1084': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25318729__IMP-CONTRIB__20100426-CAN-73499__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/173914-4593_jeopardy_statenicknamefun_400': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/172672-1883_jeopardy_sciencefiction_400': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18497756__WQA__E-2008-0550__EN_e6026e': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16354857__WQ__P-2007-5826__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24872811__WQ__P-2010-1382__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33086530__WQA__E-2011-010899__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32814995__WQ__E-2011-011745__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/090697-3373_jeopardy_authors_300': Extracted text split into 140 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/058348-4788_doublejeopardy_californiamissions_2000': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/170842-3347_doublejeopardy_bctimes_600': Extracted text split into 71 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/17473811__WQA__E-2007-5634__EN': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_86801': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12070967__WQA__P-2006-3657__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/185351-3082_doublejeopardy_worldgeography_800': Extracted text split into 130 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_81564': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/143395-1882_jeopardy_fileunderx_200': Extracted text split into 44 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/027724-2934_doublejeopardy_astronomy_400': Extracted text split into 124 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/17817520__TA__P6-TA-2008-0080__EN_da1eb8': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19827406__WQA__E-2008-4353__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25387498__WQ__E-2010-2909__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29307620__WQ__E-2011-001792__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17952826__WQA__E-2007-5501__EN_e0360a': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/165795-4673_doublejeopardy_soundslikeasimpsonscharacter_2000': Extracted text split into 78 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/12879546__WQ__E-2006-5830__EN_599900': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26936826__WQA__E-2010-5187__EN_320caf': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31452819__WQ__E-2011-007992__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/093949-2686_doublejeopardy_espionage_400': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/125515-3845_doublejeopardy_iamarock_400': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/034674-3217_jeopardy_shakespeareplainsimple_400': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/30087545__WQA__E-2011-000963__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_84742': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28225871__WQ__E-2010-010512__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17326142__IMP-CONTRIB__20080124-CAN-19625__EN_2cd96f': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/200938-2819_doublejeopardy_fooddrink_200_900df3': Extracted text split into 144 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/17245061__WQ__P-2008-0089__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/097549-3551_jeopardy_rank_200_74a7c4': Extracted text split into 162 chunk(s).


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

  - Processing sample 'final_train/191046-4858_jeopardy_aviationfirsts_400': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/122870-3601_finaljeopardy_90snotables_': Extracted text split into 62 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/112973-484_doublejeopardy_wwiii_1000': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/14421410__WQA__E-2007-0276__EN_a9bd34': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20399627__WQA__E-2008-5077__EN_8c9937': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23876405__WQ__E-2009-5769__EN_dc0ff3': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/155537-3722_jeopardy_howsoonweforget_300': Extracted text split into 35 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/17811850__MOTION__B6-2008-0124__EN_73e0f9': Extracted text split into 35 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/122819-3601_jeopardy_actorrific_200': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/23028336__WQ__E-2009-4142__EN_bd97fe': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/126734-3754_jeopardy_homophones_500_f3ef56': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/047280-4312_jeopardy_toughgeography_200': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/32658021__WQ__E-2011-011290__EN_84182b': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/145481-3333_jeopardy_peopleinentertainment_200': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/167037-4954_doublejeopardy_filmsofthe50s_3500': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/113898-3325_jeopardy_famousbeauties_500': Extracted text split into 135 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/27114230__WQA__E-2010-6189__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24649924__WQ__E-2010-0806__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15067334__WQA__E-2007-1573__EN_779063': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33127343__WQA__E-2011-011126__EN_0256d9': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/104895-3207_doublejeopardy_thecolon_600': Extracted text split into 138 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/26686518__WQ__E-2010-6814__EN_2bdeba': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_62': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/6266081__WQ__E-2004-0617__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16139061__WQ__E-2007-5237__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20644039__WQ__E-2008-6410__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32177270__WQ__E-2011-009657__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20230597__IMP-CONTRIB__20081027-CAL-40737__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/008575-62_doublejeopardy_bees_200_131482': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19837812__QT__H-2008-0754__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_17375': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25610024__WQA__E-2010-1331__EN_c20192': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/214739-2907_jeopardy_loseaturn_300_3574c4': Extracted text split into 145 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_35899': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/085082-3659_doublejeopardy_thehistoryoftheworldpart2_200': Extracted text split into 120 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_53376': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/038928-4613_jeopardy_yangtzedoodle_1000': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/13945158__WQ__E-2007-1814__EN_c36c8c': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18188832__WQ__E-2008-2116__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25789577__WQ__E-2010-3613__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33121942__WQ__E-2012-000486__EN_77f6eb': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21720378__WQ__E-2009-1451__EN_9439f2': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21937899__COMPARL__2009-04-02-1__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_79002': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/060423-3000_doublejeopardy_3000_600': Extracted text split into 127 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/19755449__WQA__E-2008-4484__EN': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_89343': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_66323': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30975709__WQ__E-2011-006132__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17739711__WQ__E-2008-0958__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8816128__WQA__P-2003-3831__EN_980740': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12016458__WQA__E-2006-0644__EN_e4df81': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12844010__WQ__E-2006-5448__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/187139-4651_doublejeopardy_ihearyoucalling_400': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/19065406__WQ__E-2008-3222__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33449930__WQ__E-2012-001697__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30072369__WQ__E-2011-003590__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12552905__WQ__E-2006-4661__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/2304405__WQ__E-2003-1865__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25090697__IM-PRESS__20100406-STO-72100__EN_499dcc': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/091066-1175_jeopardy_singers_400_f5d339': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/10577035__WQ__E-2006-0302__EN_a443bb': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_32029': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/116151-3906_doublejeopardy_trustory_1000': Extracted text split into 151 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/11443319__WQA__P-2004-2571__EN_7c73d3': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_74886': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27698217__WQ__E-2010-9373__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/154969-5196_doublejeopardy_youreasuperhero_1600_4d55d9': Extracted text split into 45 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/072267-1162_jeopardy_presidentialtrivia_400_3d4698': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/19432614__WQ__P-2008-4592__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22963340__WQA__E-2009-3085__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19827291__WQA__E-2008-3798__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_15520': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18188884__WQ__E-2008-2129__EN_297bc2': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/114005-3941_jeopardy_restaurateurs_200_997b4c': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/23389564__QT__H-2009-0368__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33740437__WQA__E-2012-001213__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/038521-3258_jeopardy_themaskofzorro_500': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/27214790__WQA__E-2010-6501__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22173613__WQA__E-2009-1510__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18215630__WQA__E-2007-6008__EN_ae70a6': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/159401-4659_jeopardy_shapeup_600_de1db0': Extracted text split into 60 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/9422575__WQ__E-2005-2739__EN_3e9893': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/181114-2901_doublejeopardy_potluck_800': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_12087': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29152854__WQ__E-2011-001389__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/083278-3595_jeopardy_peopleinsports_300': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17886556__QT__H-2008-0188__EN_ca848e': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23233066__IM-PRESS__20094000-AGD-00000__EN': Extracted text split into 110 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/142329-4891_jeopardy_timetoeat_200_1e2d12': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/16048786__WQA__E-2007-2872__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24614855__WQA__E-2009-5789__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23428493__TA__P7-TA-2009-0044__EN_e6b1b9': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11912853__WQ__E-2006-3485__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30955337__MOTION__B7-2011-0398__EN_8eac95': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/168959-3037_jeopardy_mindyourbusiness_400': Extracted text split into 72 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/080532-3548_doublejeopardy_presidentialhunks_200': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/027584-3738_jeopardy_applesoranges_300': Extracted text split into 116 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/26500158__WQ__E-2010-5910__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8741844__PRESS__BI-20050304-1__EN': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/117296-5118_jeopardy_namethedecade_400': Extracted text split into 92 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_7362': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/174455-4452_jeopardy_gimmeans_200': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12917659__WQA__E-2006-4388__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_41960': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/158880-2966_jeopardy_thebigapple_400': Extracted text split into 60 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/6302296__WQ__E-2004-0812__EN_4dbfdf': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_69518': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9539586__WQ__E-2005-3333__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23111135__WQ__E-2009-4274__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28638096__IM-PRESS__20110202-IPR-13044__EN': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_60194': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/046295-3004_jeopardy_oops_400': Extracted text split into 42 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/196199-280_jeopardy_wwiitrivia_400_e163ff': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/048668-3237_jeopardy_drnoguchi_200': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/052308-3401_jeopardy_uscities_100_3a9574': Extracted text split into 132 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/30493605__WQA__E-2011-002997__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/148543-2867_jeopardy_timesmanoftheyear_400': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_25271': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27955237__WQ__E-2010-9811__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/012015-4693_jeopardy_charactersfarewells_400_af2a63': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/215143-5167_jeopardy_youreinthiscountryif_200': Extracted text split into 123 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/22883369__IM-PRESS__20090710-STO-58043__EN_e8767a': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15897763__WQA__E-2007-4018__EN_c87aa9': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18433913__IM-PRESS__20080505-IPR-28210__EN': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/27537600__WQ__E-2010-8713__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30569948__WQ__E-2011-005353__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/037440-4165_jeopardy_numbwithnumbers_1000': Extracted text split into 71 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/036275-3063_jeopardy_historicdates_200': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/039120-5032_doublejeopardy_japanusrelations_800': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/072902-3798_doublejeopardy_organizedlabor_800': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/26812982__WQA__E-2010-5490__EN_f81eb2': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_55719': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11215082__WQA__E-2005-2539__EN_96fc43': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18074186__WQ__E-2008-1759__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/030891-4268_doublejeopardy_itbordersrussia_800': Extracted text split into 59 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/19851448__WQA__E-2008-4070__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/020476-3170_jeopardy_wildthings_500': Extracted text split into 138 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/32877083__WQ__E-2011-011967__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/166055-3279_jeopardy_worldcup98_300': Extracted text split into 80 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/137839-3368_doublejeopardy_descartes_400': Extracted text split into 46 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/189706-3650_doublejeopardy_20thcenturyeurope_600': Extracted text split into 46 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/33532180__WQA__P-2012-000848__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/063759-3816_jeopardy_historicoccasions_500': Extracted text split into 154 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/14176402__WQ__E-2007-2256__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/162136-4963_jeopardy_borntorun_800': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_32660': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/133306-3524_doublejeopardy_americanlit_600': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/11707097__WQA__E-2006-2039__EN_8ac034': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16902429__IM-PRESS__20071019-IPR-11909__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24071421__WQ__P-2009-6375__EN_3eb646': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/107196-1193_doublejeopardy_politicalquotes_1000': Extracted text split into 130 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/22650255__WQA__E-2009-1656__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_60820': Extracted text split into 39 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/037609-3729_jeopardy_statenicknames_300': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33478891__WQ__E-2012-001784__EN_fc12d0': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23303204__WQA__E-2009-3897__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17298385__WQ__E-2007-6540__EN_cc6066': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23734818__WQ__E-2009-5060__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24840680__IM-PRESS__20100305-STO-70031__EN': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6451539__MOTION__B5-1999-0185__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/025100-4903_doublejeopardy_gontomorrow_1600': Extracted text split into 138 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/132487-4600_doublejeopardy_thatsourisland_1200': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/075705-3513_jeopardy_maiden_500': Extracted text split into 107 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/029249-2019_jeopardy_sewing_100': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/052706-3259_doublejeopardy_thewanderyears_2000': Extracted text split into 137 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/11876987__WQA__E-2004-3292__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30287840__TA__P7-TA-2011-0233__EN_f23eb8': Extracted text split into 45 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/067214-2930_jeopardy_rhymeswithjock_100': Extracted text split into 40 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/24902264__WQA__E-2009-6477__EN_0c0dc2': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26932883__WQ__E-2010-7154__EN_f8a863': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/106600-3116_doublejeopardy_fastfood_800': Extracted text split into 43 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/166466-4725_jeopardy_aheavenlycategory_400_e0c857': Extracted text split into 50 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/215594-3589_doublejeopardy_buyamericanlit_600': Extracted text split into 45 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/13994976__WQA__E-2006-5087__EN': Extracted text split into 36 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/26320268__TA__P7-TA-2010-0277__EN_0f475a': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/11150195__WQA__E-2005-3790__EN_93a844': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/180743-3415_doublejeopardy_humanitarians_600': Extracted text split into 128 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/061355-475_jeopardy_wines_300': Extracted text split into 134 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/6356265__MOTION__P5-RC-2003-0475__EN_c0def1': Extracted text split into 79 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/33526270__WQ__P-2012-002522__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13622054__QT__H-2007-0243__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_7569': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28872438__IM-PRESS__20110216-IPR-13778__EN_4de43c': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25182631__WQA__E-2010-0640__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/128922-2378_doublejeopardy_the7ancientwonders_1000': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/32630149__WQ__E-2011-011151__EN_5fd454': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16379228__WQA__E-2007-4041__EN_35fd24': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/152691-3615_jeopardy_winters_100': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/033223-2981_jeopardy_ores_100_3f4e36': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/12063388__WQ__E-2006-3871__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_4378': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/007231-2515_jeopardy_sports_100_dedd22': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/18414200__WQ__E-2008-2431__EN_4ae5d9': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_47956': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_2338': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28811016__QT__H-2011-000079__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20718144__IM-PRESS__20081208-BKG-44004__EN_5bb402': Extracted text split into 193 chunk(s).


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_48020': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/626064__RULES-EP__20031020__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/134945-4994_jeopardy_simpsonscharacters_200_f2540c': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/064060-4519_doublejeopardy_walterworld_2800': Extracted text split into 135 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/27077435__WQA__E-2010-6517__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_52311': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/189088-3129_jeopardy_inthekey_400': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/12658504__WQA__E-2006-3211__EN_10b993': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/004253-3403_jeopardy_bingeography_100_61592b': Extracted text split into 93 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/072290-1162_doublejeopardy_aroundthehouse_400': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/24066854__WQ__E-2009-6226__EN_76e2ad': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28482908__WQ__P-2010-011138__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20085036__COMPARL__2008-10-20-1__EN_7c0cec': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17139572__WQA__E-2007-5637__EN_945739': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25471379__TA__P7-TA-2010-0112__EN': Extracted text split into 76 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/046297-3004_jeopardy_landformations_500_79a71f': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_62323': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/126840-4809_jeopardy_musicmakers_400_9a1b25': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32901174__WQ__E-2011-012084__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15915613__WQ__E-2007-4826__EN_56ed1f': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18498322__WQA__E-2008-1145__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11529512__WQ__E-2006-2668__EN_48fac3': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16813438__IM-PRESS__20070618-IPR-07948__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26932635__WQ__E-2010-7253__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26898251__WQA__E-2010-5458__EN': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9705777__TA__P6-TA-2005-0400__EN_c53d48': Extracted text split into 232 chunk(s).


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

  - Processing sample 'final_train/19430604__WQ__E-2008-4397__EN_2dad1e': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18017388__WQ__E-2008-1638__EN_78bd17': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32997559__WQ__P-2011-012222__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30033821__OQ__O-2011-000104__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24082481__WQA__E-2009-5244__EN_5afe0d': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/204118-11_jeopardy_holidays_500': Extracted text split into 47 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/198615-2906_jeopardy_crosswordcluesa_100': Extracted text split into 81 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/169124-3366_jeopardy_mrmike_100_f865d6': Extracted text split into 61 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/069684-5193_doublejeopardy_worldofwater_1200': Extracted text split into 151 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/056965-4896_doublejeopardy_basslines_1600': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/080901-3690_doublejeopardy_foodphrases_400': Extracted text split into 44 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/22189733__WQ__E-2009-2421__EN_de45fb': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29307648__WQ__E-2011-001799__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/125951-4886_jeopardy_thelate40s_400': Extracted text split into 72 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/11164590__WQA__E-2005-1855__EN_bf4972': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/210040-4566_jeopardy_addaletter_1000': Extracted text split into 140 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/15972823__WQA__E-2007-4067__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/124335-3132_doublejeopardy_strifewithfather_600': Extracted text split into 75 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/023972-4664_doublejeopardy_alliteration_1200': Extracted text split into 112 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/7279861__TA__P5-TA-2000-0288__EN': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19345505__WQ__E-2008-4115__EN_a14c80': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/132615-2972_doublejeopardy_restaurantpotpourri_1000_4d9a51': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_4499': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/156576-2677_doublejeopardy_naturalists_400': Extracted text split into 64 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/163888-3044_doublejeopardy_actorsinpoliticalfilms_800': Extracted text split into 58 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/26420077__WQA__E-2010-0311__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/000282-4931_doublejeopardy_atthemall_1200': Extracted text split into 97 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_51606': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20904991__WQ__E-2008-6913__EN_71fac6': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_21308': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14326550__WQ__P-2007-2655__EN_9daa3f': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/159071-4763_jeopardy_nymdropper_1000': Extracted text split into 45 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_76966': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/052986-3550_doublejeopardy_secondladiesfirstnames_200': Extracted text split into 109 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_60706': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/071677-3873_jeopardy_geometry_400': Extracted text split into 99 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/14223275__QT__H-2007-0382__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9701840__QT__H-2005-0912__EN_b03f26': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_56779': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/079079-3202_jeopardy_presidentialtermsofservice_400': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/015502-3272_doublejeopardy_birthplaces_800': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_81332': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6398528__MOTION__B5-2003-0030__EN': Extracted text split into 75 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/16943393__REPORT__A6-2007-0499__EN': Extracted text split into 49 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/26077969__WQ__E-2010-4387__EN_6af2d3': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/086397-1837_jeopardy_tvnostalgia_500_012caa': Extracted text split into 145 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/24605480__WQ__E-2010-0546__EN_d2dcdf': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_87588': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/046343-14_jeopardy_sports_300': Extracted text split into 58 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/20824408__WQA__P-2008-6309__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/087170-3449_doublejeopardy_uspostalabbreviations_400': Extracted text split into 44 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/038785-4721_doublejeopardy_decadeofdiscovery_2000': Extracted text split into 39 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/24280794__QT__H-2010-0028__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_73389': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_12378': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31139488__WQ__E-2011-006753__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26120304__WQA__P-2010-3460__EN_e12f45': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/160477-2347_jeopardy_uscities_200': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_37354': Extracted text split into 43 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/7029649__TA__20011114__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/240399__RULES-EP__20020705__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/089920-3135_jeopardy_creeperscrawlers_500': Extracted text split into 140 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/26380488__IM-PRESS__20103000-AGD-00000__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8418125__WQ__E-2004-2981__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20843013__TA__P6-TA-2008-0634__EN': Extracted text split into 237 chunk(s).


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

  - Processing sample 'final_train/24232738__IMP-CONTRIB__20100121-CAN-67774__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/072022-3628_jeopardy_animals_100': Extracted text split into 140 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/5696162__WQ__E-2004-0389__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_89561': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/116926-1874_doublejeopardy_americanindians_800': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/20654248__REPORT__A6-2008-0470__EN_fdedc7': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/048258-3692_jeopardy_popgoestheballet_500': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29438981__WQA__P-2011-001069__EN_b1a0a5': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9867017__WQ__E-2004-3424__EN_067364': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/088033-3889_doublejeopardy_authorsonfilm_1000_df075e': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/176306-3380_jeopardy_thebritishinvasion_300': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/25752767__IMP-CONTRIB__20100527-CHE-75089__EN_bba3c8': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25843676__WQA__E-2010-2231__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/602959__AGENDA__20031022__EN_9579d2': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33277478__WQA__E-2011-011186__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29559729__WQA__E-2011-000062__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/7288303__TA__P5-TA-2000-0145__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33479756__WQ__E-2012-001883__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19017684__WQ__P-2008-3513__EN_706d96': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8258623__WQ__E-2004-1700__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/116165-4333_jeopardy_museums_400': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/045252-3172_doublejeopardy_meninblack_200': Extracted text split into 46 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/24123787__WQA__E-2009-5910__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/035062-5183_doublejeopardy_statesfreak_1200': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/21737977__TA__P6-TA-2009-0112__EN_853330': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21547061__REPORT__A6-2009-0046__EN_7c0236': Extracted text split into 537 chunk(s).


Batches:   0%|          | 0/17 [00:00<?, ?it/s]

  - Processing sample 'final_train/031957-2894_jeopardy_moviequotes_400': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_80827': Extracted text split into 35 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/170452-4729_jeopardy_9letterwords_600_0b269a': Extracted text split into 70 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/104192-2868_jeopardy_marchingband_200': Extracted text split into 86 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/26632517__WQ__E-2010-6555__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11034308__WQ__E-2006-1706__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31905132__WQ__E-2011-008931__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/096310-5187_jeopardy_placesonthemap_200': Extracted text split into 134 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/11164240__WQA__E-2005-1842__EN_02ee93': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33710747__WQA__E-2012-000903__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27872318__WQA__E-2010-7677__EN_05d05d': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_44258': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/069597-5074_jeopardy_kidstough_800': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16463909__WQ__P-2007-5967__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25526781__WQ__E-2010-3131__EN_252c61': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13995781__WQA__E-2006-5663__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/138291-3304_jeopardy_boxofficebombs_300': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/12017966__WQA__E-2006-2964__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8816317__WQA__E-2004-0317__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32439633__WQA__E-2011-008870__EN_780e45': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/056143-4361_jeopardy_holdit_200': Extracted text split into 128 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/12059526__PV__20060926__EN': Extracted text split into 181 chunk(s).


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

  - Processing sample 'final_train/171110-2918_jeopardy_literature_200': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12015710__WQA__E-2006-0278__EN_979637': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_20031': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/19862930__COMPARL__2008-10-06-1__EN_84762e': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/131807-777_jeopardy_lifescience_500': Extracted text split into 51 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/27681054__IMP-CONTRIB__20101116-CNW-95192__EN_b591f6': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17127588__IMP-CONTRIB__20080107-CAN-17528__EN_4ee754': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27376762__IM-PRESS__20101020-IPR-89473__EN': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/123883-3072_jeopardy_travelindianapolis_100': Extracted text split into 64 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/31430221__WQA__E-2011-005793__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/181451-4195_jeopardy_specificgenerals_2000': Extracted text split into 76 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/192194-3583_doublejeopardy_talkingitalian_200': Extracted text split into 154 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_77869': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/188182-3604_jeopardy_thebillofrights_300': Extracted text split into 130 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/21098140__WQA__P-2008-6306__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18231966__WQA__E-2007-6165__EN_9b070e': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/187525-2624_jeopardy_television_200_8fe55c': Extracted text split into 135 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_28160': Extracted text split into 35 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/20391711__MOTION__B6-2008-0585__EN_d4ff1f': Extracted text split into 36 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/31198078__WQ__E-2011-006677__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12017130__WQA__E-2006-1917__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/162234-1655_finaljeopardy_actressestheirroles_': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/182164-2810_jeopardy_museums_200': Extracted text split into 133 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_27905': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/120022-3280_doublejeopardy_toughbeforeafter_400_a9a8fc': Extracted text split into 71 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/017587-731_doublejeopardy_artists_400': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/7163798__TA__20010405__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/308808__REPORT__A5-2002-0338__EN': Extracted text split into 187 chunk(s).


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_46717': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21652262__WQA__P-2009-0670__EN': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/134626-3133_doublejeopardy_americanliterature_400': Extracted text split into 71 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/27698421__WQ__E-2010-9302__EN_8419ee': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6639029__TA__P5-TA-2004-0087__EN': Extracted text split into 1562 chunk(s).


Batches:   0%|          | 0/49 [00:00<?, ?it/s]

  - Processing sample 'final_train/446533__PRESS__NA-20030221-1__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/025799-2923_jeopardy_potluck_500': Extracted text split into 140 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/053140-4813_jeopardy_inspiredsongs_400': Extracted text split into 150 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_33637': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19360810__WQ__E-2008-4194__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26926547__IMP-CONTRIB__20100917-CAN-82755__EN_80090f': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_51689': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16145480__WQA__E-2007-3824__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_70972': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/6326319__MOTION__P5-RC-2004-0118__EN_fa21b2': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17865079__WQ__E-2008-1259__EN_86226b': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26463875__WQ__E-2010-5666__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33251333__WQA__E-2011-011689__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28558970__WQA__E-2010-9598__EN_e9e3d2': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12008368__WQ__P-2006-2580__EN_f82475': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22172655__WQA__E-2009-0551__EN_0fd78e': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_75824': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_89943': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32545075__IM-PRESS__20111130-IPR-32811__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/168312-2150_doublejeopardy_animalsinmythology_200_939b99': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/100819-3802_jeopardy_noway_400': Extracted text split into 126 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/133735-2978_doublejeopardy_rulers_1400': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/029207-2814_jeopardy_creatures_400': Extracted text split into 149 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/193488-4228_doublejeopardy_ballet_800': Extracted text split into 132 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_26140': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/170418-5208_doublejeopardy_guinnessworldrecords_1000': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15121183__WQ__P-2007-4012__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/010644-4935_doublejeopardy_ayoungpersonsguidetoart_2000': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/196959-1308_jeopardy_antiques_300': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/144643-500_jeopardy_daysinsong_1000': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/13179716__WQA__P-2006-5443__EN_74a360': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12407209__WQA__E-2006-3332__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19431012__WQ__E-2008-4541__EN_72c7d8': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22396302__TA__P6-TA-2009-0295__EN': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/23472140__WQ__E-2009-4741__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18443132__WQ__E-2008-2568__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/010307-3014_jeopardy_tvtwins_400': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/8066253__PRESS__DN-20040913-1__EN': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/124185-3493_jeopardy_hollywoodheartthrobs_200': Extracted text split into 49 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/213931-1873_doublejeopardy_jazz_800': Extracted text split into 110 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/065560-2581_doublejeopardy_zoology_200': Extracted text split into 100 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/035134-3771_doublejeopardy_beginsendswithn_1000': Extracted text split into 41 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/16932975__WQA__E-2007-5002__EN_1f7958': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12860558__WQ__E-2006-5645__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/209355-5163_jeopardy_popularculture_200': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/682896__PRESS__NR-20031203-1__EN': Extracted text split into 145 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/167673-4989_doublejeopardy_doubletalk_400': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/26510620__WQA__E-2010-4134__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32523378__WQ__E-2011-010908__EN_615c37': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30656167__WQ__E-2011-005388__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28602114__WQA__E-2010-9678__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14421442__WQA__E-2007-0344__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/5638654__WQ__P-2004-0392__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33288483__MOTION__B7-2012-0087__EN_3b2f73': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_56239': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30094515__IM-PRESS__20110502-IPR-18524__EN_295879': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/006181-4968_jeopardy_brando_400': Extracted text split into 85 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/056922-4896_jeopardy_itsaustraliamate_400': Extracted text split into 140 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/16279130__WQA__E-2007-4906__EN_c48512': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/184242-3584_jeopardy_irishstew_200_0ad53c': Extracted text split into 122 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_50835': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9645824__WQ__E-2005-3565__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6414923__MOTION__P5-RC-2002-0411__EN': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/103644-2635_doublejeopardy_ballet_1000': Extracted text split into 62 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/23507960__IMP-CONTRIB__20091027-CAN-63311__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29559917__WQA__E-2011-000299__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23433472__QT__H-2009-0382__EN_3018a8': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11440849__WQA__E-2004-2755__EN_227387': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24481274__OQ__O-2010-0016__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21245253__WQA__E-2008-6261__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/058564-446_doublejeopardy_futuristicfilms_200': Extracted text split into 152 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/28968554__WQ__E-2011-000938__EN_c38913': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/197226-3328_doublejeopardy_asilentp_600_c80eeb': Extracted text split into 155 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/009156-3907_jeopardy_smackdabinthemiddle_300': Extracted text split into 112 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/18291645__REPORT__A6-2008-0155__EN_e10aaf': Extracted text split into 303 chunk(s).


Batches:   0%|          | 0/10 [00:00<?, ?it/s]

  - Processing sample 'final_train/155903-3486_doublejeopardy_salutetheflag_200_e9e625': Extracted text split into 52 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/209204-4785_jeopardy_mirrors_1000': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/10849484__TA__20060406__EN_b2a917': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/070911-4850_jeopardy_vocabularytest_1600': Extracted text split into 131 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/142550-2669_doublejeopardy_libraries_600': Extracted text split into 72 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/167737-5078_doublejeopardy_thealiensarehere_800': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/146065-4592_doublejeopardy_authors_2000': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_1076': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32976286__WQA__E-2011-010402__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24644976__WQ__E-2010-0891__EN_c548fd': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32244554__WQA__E-2011-008262__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/110375-1152_doublejeopardy_languages_200': Extracted text split into 123 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_4459': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/036627-734_jeopardy_thethirdworld_100': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/12917895__WQA__E-2006-4705__EN_729300': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/212448-3369_jeopardy_regisphilbinsny_100': Extracted text split into 158 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/23485109__WQA__E-2009-4315__EN_a0a4af': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/2305824__WQ__E-2003-2021__EN_527a93': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/094431-3868_doublejeopardy_thekingsofengland_400': Extracted text split into 145 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_5634': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/23560393__WQ__E-2009-5154__EN_6b6d27': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/2304878__WQ__E-2003-1912__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9432430__WQ__E-2005-3154__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_27276': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/135479-4532_doublejeopardy_prehistorictimes_2000': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/141646-1427_doublejeopardy_philosophy_600': Extracted text split into 72 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/21425207__IM-PRESS__20090216-IPR-49532__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/166696-2357_doublejeopardy_collegesuniversities_1000': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/31625023__WQA__E-2011-006915__EN_965415': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29489208__WQA__E-2011-000300__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6282844__REPORT__A5-2004-0134__EN': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/25274559__IMP-CONTRIB__20100422-DAL-73204__EN_f27104': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26009636__WQA__E-2010-1920__EN_75c762': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17823629__WQ__E-2008-1060__EN_163ec7': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20513675__WQA__E-2008-4821__EN': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/10656777__WQ__E-2005-1027__EN_e7d7f1': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12763282__WQA__E-2006-4373__EN_c76f51': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_3677': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33189686__WQA__P-2011-012224__EN_379f90': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/059606-2622_jeopardy_californiacities_100': Extracted text split into 135 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_88790': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14886383__WQ__E-2007-3476__EN_cef05f': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33455496__MOTION__B7-2012-0121__EN_5512ff': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16393640__IM-PRESS__20051114-BKG-02297__EN': Extracted text split into 380 chunk(s).


Batches:   0%|          | 0/12 [00:00<?, ?it/s]

  - Processing sample 'final_train/18352206__WQA__E-2008-0127__EN_b2a5cd': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10767070__WQ__E-2005-0564__EN_03f9f4': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/114529-533_jeopardy_science_100': Extracted text split into 98 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/30087549__WQA__E-2011-000964__EN_bccc98': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_6300': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/215351-5135_doublejeopardy_femaleleaders_400': Extracted text split into 44 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/22954759__WQ__E-2009-3754__EN_da84e8': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27335440__WQ__E-2010-8374__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23921802__WQA__E-2009-4652__EN_a7e314': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/95910__REPORT__A5-2001-0364__EN_762779': Extracted text split into 805 chunk(s).


Batches:   0%|          | 0/26 [00:00<?, ?it/s]

  - Processing sample 'final_train/163984-2996_jeopardy_ohmi_500_8392fc': Extracted text split into 58 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/22601827__WQA__E-2009-1303__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/090044-3890_doublejeopardy_readersdigest_200': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6397989__MOTION__B5-2003-0140__EN': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/085092-3659_doublejeopardy_thehistoryoftheworldpart2_600': Extracted text split into 144 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/029118-3634_doublejeopardy_collegesuniversities_1000': Extracted text split into 153 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/17512693__WQ__P-2008-0607__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/048434-3581_jeopardy_ohwhatayear_400_a6f7ec': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/156303-3672_jeopardy_5letterwords_100': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/062657-2830_jeopardy_monkeybusiness_100': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/9585940__TA__P6-TA-2005-0382__EN_cc9116': Extracted text split into 108 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/146685-370_jeopardy_warstories_300_b15c21': Extracted text split into 55 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/203077-5178_jeopardy_burn_400': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/051269-3927_doublejeopardy_heybeautiful_600': Extracted text split into 42 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/10728862__WQ__E-2005-0920__EN_937142': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19026412__IM-PRESS__20080623-IPR-32472__EN_3bd4a3': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/185926-3039_jeopardy_biblicalpeople_500': Extracted text split into 131 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/052957-3550_jeopardy_oldtestamentpeople_100': Extracted text split into 98 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/104212-2868_jeopardy_birdsbees_500': Extracted text split into 134 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/11790583__WQ__E-2006-3240__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32224985__MOTION__B7-2011-0577__EN': Extracted text split into 55 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/118165-4350_jeopardy_crackopenaushistorybook_1000': Extracted text split into 155 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/163380-4538_jeopardy_franklinbenfranklin_400_52c877': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/33710163__WQA__E-2012-000361__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19393235__WQ__P-2008-3989__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/091676-1659_doublejeopardy_thecongress_200': Extracted text split into 144 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/32491438__REPORT__A7-2011-0407__EN': Extracted text split into 42 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_3877': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/006174-4968_jeopardy_intothewoods_200': Extracted text split into 131 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_563': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/8210686__WQ__E-2004-2262__EN_61b210': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9308693__WQ__E-2005-2108__EN_ac8a89': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/134059-4743_jeopardy_reagan101_600': Extracted text split into 52 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/22134600__WQ__E-2009-2393__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27455740__WQ__E-2010-8545__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_8423': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_71824': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/168104-3249_doublejeopardy_johndenverlyrics_1000': Extracted text split into 48 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/11523821__WQ__E-2006-2741__EN_5eaf4d': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10510138__QT__H-2006-0190__EN_f74a9b': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_14342': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_53569': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_16039': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/131797-777_jeopardy_endsinable_300': Extracted text split into 56 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_21823': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24123365__WQA__E-2009-5377__EN_3c0328': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19558479__WQ__E-2008-4655__EN_89b204': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9391763__WQ__E-2005-2650__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/211721-2807_doublejeopardy_singers_800': Extracted text split into 35 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/17707117__IMP-CONTRIB__20080228-CHE-22502__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25412297__WQ__E-2010-2842__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10623839__QT__H-2006-0239__EN_bfdf47': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16978917__WQ__P-2007-6148__EN_0acdfd': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/093400-4865_doublejeopardy_ancienttimes_400': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/27331713__QT__H-2010-0537__EN_7b8ab3': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6406467__MOTION__B5-2002-0323__EN_00d247': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/26399454__WQ__E-2010-4971__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24873343__WQ__E-2010-1499__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19976633__IMP-CONTRIB__20081007-CAN-38924__EN_338259': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/108038-2514_jeopardy_rankstitles_200': Extracted text split into 130 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/33137277__IMP-CONTRIB__20120127-CAN-36348__EN_580b90': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_15562': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/130339-4791_jeopardy_likearollingstone_1000': Extracted text split into 40 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/041620-4512_doublejeopardy_theeagleshavelanded_1200': Extracted text split into 78 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_42702': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/127594-4181_doublejeopardy_crusader_400': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/24975452__WQ__E-2010-1718__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_76732': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_23915': Extracted text split into 82 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/31116231__WQA__E-2011-003510__EN_212f90': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_78850': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24110203__WQA__E-2009-5637__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_70158': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/015281-2685_jeopardy_oldbaseballteams_200': Extracted text split into 105 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_73371': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_41127': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/027019-2736_doublejeopardy_notablenames_800': Extracted text split into 132 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/14719022__WQA__E-2007-1700__EN_271f80': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/213449-5092_doublejeopardy_beastlyexpressions_1200_26ee9d': Extracted text split into 123 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/11655116__WQ__E-2006-2903__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26077973__WQ__E-2010-4388__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_13543': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9358595__WQ__E-2005-2308__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13332572__QT__H-2007-0195__EN_ccd7bb': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_41137': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_85326': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19098089__WQA__P-2008-2991__EN_50f80f': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/143596-2959_jeopardy_endsinll_500': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/23265716__WQA__E-2009-4163__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_44592': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/163873-3044_doublejeopardy_lobbyists_200': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/18750458__WQA__E-2008-2297__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28348453__WQA__E-2010-8596__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_72336': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33189242__WQA__E-2011-011544__EN_a46bd2': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12608747__WQ__P-2006-5060__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/019103-3322_jeopardy_aroundtheoffice_200': Extracted text split into 158 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/30980493__WQ__E-2011-006336__EN_a08679': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33441970__WQ__E-2012-001536__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26447851__WQ__E-2010-5198__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/116704-3744_jeopardy_thefirst_100': Extracted text split into 153 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/32612219__WQ__E-2011-011113__EN_6cb281': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/089942-3135_doublejeopardy_lazybones_800': Extracted text split into 47 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_57618': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14868567__QT__H-2007-0546__EN_b4a2c0': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/172836-3342_jeopardy_singitsister_100_865a16': Extracted text split into 72 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/30090767__WQ__E-2011-004111__EN_9a2a90': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24133081__IM-PRESS__20100111-IPR-67127__EN': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17473999__WQA__P-2007-6430__EN': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/055182-3327_jeopardy_bodiesofwater_500': Extracted text split into 116 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/071283-4366_doublejeopardy_rwanda_800_40e6b6': Extracted text split into 134 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_62868': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/174284-3180_jeopardy_indy500flags_300': Extracted text split into 80 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/143602-2959_doublejeopardy_shakespeareancharacters_200': Extracted text split into 43 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/12536244__WQA__E-2005-3831__EN': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19709170__WQ__E-2008-4900__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/197752-3086_jeopardy_january1948_500_c888ed': Extracted text split into 119 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/163241-3505_doublejeopardy_wildwildwest_800': Extracted text split into 41 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/010977-4362_doublejeopardy_pennames_400': Extracted text split into 85 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/13139802__WQ__E-2007-0555__EN_9b3d13': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13956527__TA__P6-TA-2007-0164__EN_b3cc03': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/20689921__WQA__P-2008-5668__EN_d1e96e': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33087026__WQA__E-2011-011793__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_64429': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/5651813__WQA__E-2003-2067__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_73428': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/143790-4310_doublejeopardy_nudistbuddhistorcubist_800': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/056778-3131_doublejeopardy_garywritesanovel_400': Extracted text split into 159 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/26898259__WQA__E-2010-5486__EN_ef6373': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_51341': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/106002-4641_doublejeopardy_graysanatomy_1000_9c10b3': Extracted text split into 119 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/168608-2932_doublejeopardy_comicactors_200': Extracted text split into 71 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/11665917__WQ__P-2006-3244__EN_7e812c': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11442440__WQA__E-2004-2877__EN_d29984': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20340664__REPORT__A6-2008-0393__EN_299ff0': Extracted text split into 180 chunk(s).


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

  - Processing sample 'final_train/16173314__WQ__E-2007-5370__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32876875__WQ__P-2011-011950__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/160435-4611_jeopardy_quasirelatedpairs_1000': Extracted text split into 54 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_47643': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30750221__WQA__E-2011-003043__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16989942__WQA__P-2007-5666__EN_bf0135': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9441042__WQ__E-2005-2898__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/124630-5104_doublejeopardy_thatoldtimereligion_1200': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/16979056__WQ__E-2007-6036__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/194557-4352_jeopardy_variety_1000_670773': Extracted text split into 42 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/11378229__WQA__E-2004-3187__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11529577__WQ__E-2006-2696__EN_15177c': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/207897-4331_doublejeopardy_communications_400': Extracted text split into 132 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/25149391__WQA__E-2010-0248__EN_435c6c': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/037412-4292_doublejeopardy_mumstheword_2000_a31493': Extracted text split into 57 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/19430568__WQ__E-2008-4388__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/020614-4880_jeopardy_saybon_1000': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/15530002__WQ__P-2007-4195__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8259733__WQ__E-2004-1838__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17101800__IMP-CONTRIB__20071220-CAN-16973__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13122501__WQ__P-2007-0836__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/181694-3024_jeopardy_mythslegends_500': Extracted text split into 118 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/127848-5030_doublejeopardy_tunnels_1200': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/26501677__WQA__E-2010-2553__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_77410': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17086411__IMP-CONTRIB__20071218-CHE-16235__EN_c5d541': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/097238-3500_doublejeopardy_choruslines_1000': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/168098-3249_doublejeopardy_worldofpoetry_800': Extracted text split into 72 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_11141': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19091630__QT__H-2008-0528__EN_f07d58': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15731562__WQ__P-2007-4520__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16449645__IM-PRESS__20060130-IPR-04826__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/211107-3888_jeopardy_survivalskills_400': Extracted text split into 152 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/023346-3775_jeopardy_climbit_400': Extracted text split into 135 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_82414': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22192447__WQ__E-2009-2493__EN_fdd66c': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28608881__WQ__E-2011-000419__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/075099-3247_jeopardy_the19aughts_300': Extracted text split into 107 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_57277': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25182451__WQA__E-2009-6208__EN_70c111': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30081044__WQ__E-2011-003889__EN_e64036': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/147824-3390_jeopardy_andwelikedit_500': Extracted text split into 70 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/122173-3307_jeopardy_wwii_500': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/047578-3164_finaljeopardy_sportsteams_': Extracted text split into 62 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/096898-3667_doublejeopardy_thelivingworld_1000': Extracted text split into 150 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/208129-4201_jeopardy_gameshows_1000_5274ac': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11438993__WQA__E-2004-2576__EN': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/10391547__MOTION__B6-2006-0104__EN_991609': Extracted text split into 40 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_80823': Extracted text split into 40 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/14414987__WQ__E-2007-2810__EN_fc47b5': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8210716__WQ__E-2004-2267__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15619038__WQA__E-2007-3338__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30992885__WQA__E-2011-004210__EN_6d2177': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16573130__WQA__E-2007-4164__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/141230-4638_doublejeopardy_countrymatters_1200': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/027606-3738_doublejeopardy_accustomedtopublicspeaking_400': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_19467': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/191828-4357_doublejeopardy_filmsofthe90s_400': Extracted text split into 140 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_46532': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10917480__WQ__E-2006-1126__EN_dce5a9': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_13116': Extracted text split into 48 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_7820': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/033860-3243_doublejeopardy_daysofwrath_400': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/24306722__WQ__P-2010-0086__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/144723-3201_doublejeopardy_ifitstuesday_200': Extracted text split into 70 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/26448631__WQ__E-2010-5479__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9562431__OQ__O-2005-0089__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/025821-2923_doublejeopardy_religion_800': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/30087301__WQA__E-2011-000613__EN_631bd6': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33492432__WQ__E-2012-002006__EN_d9fc27': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27179577__IM-PRESS__20101004-IPR-84975__EN_87636c': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_6576': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/180291-5087_jeopardy_saudiarabia_600_2d112c': Extracted text split into 144 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_52544': Extracted text split into 35 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_46638': Extracted text split into 52 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/067175-4691_jeopardy_ithappenedinthe70s_800': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/21737190__QT__H-2009-0150__EN_d6ec81': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/108094-4527_jeopardy_backinblack_200': Extracted text split into 127 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/158404-3077_jeopardy_novelcharacters_500': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/125359-3562_jeopardy_collegeteamnicknames_100': Extracted text split into 56 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/33709905__WQA__E-2012-000013__EN_1c1231': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/7135974__WQA__E-2003-2362__EN_73f863': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11706941__WQA__E-2006-0865__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22622280__WQA__E-2009-1380__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28682890__TA__P7-TA-2011-0038__EN_3e3004': Extracted text split into 61 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/19322392__WQ__E-2008-3951__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28591340__COMPARL__2011-02-07-1__EN_6e57ca': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/5713969__WQA__E-2003-2013__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24379202__WQ__E-2010-0302__EN_3b7880': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_13420': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19680019__WQA__E-2008-3068__EN': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25789501__WQ__E-2010-3550__EN_dad84b': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6842025__TA__P5-TA-2003-0292__EN': Extracted text split into 150 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/213118-3778_jeopardy_uscities_300': Extracted text split into 130 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/15618810__WQA__E-2007-2979__EN_c1ce38': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11151660__WQA__P-2005-3729__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19066502__WQ__E-2008-3539__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27969254__WQA__P-2010-9510__EN_809795': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/7348849__TA__P5-TA-1999-0079__EN_531763': Extracted text split into 56 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/32982786__WQ__E-2011-012250__EN_44418c': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_78663': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22086419__WQ__E-2009-1824__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11442510__WQA__E-2004-2889__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19322404__WQ__E-2008-3958__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_39569': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/065229-4179_jeopardy_beatleslyrics_200': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/31635612__WQ__E-2011-008217__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/2308167__WQ__E-2003-2290__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/163246-3505_doublejeopardy_ftroop_800': Extracted text split into 59 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/30287736__TA__P7-TA-2011-0244__EN_570c94': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/2311775__WQ__E-2003-2750__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32529719__WQ__P-2011-011045__EN_d71698': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14157787__WQ__E-2007-2148__EN_4bf5be': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25157538__REPORT__A7-2010-0108__EN': Extracted text split into 279 chunk(s).


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

  - Processing sample 'final_train/11523905__WQ__E-2006-2766__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/2303767__WQ__E-2003-1792__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/142148-4229_jeopardy_heythatrhymes_200_9931e5': Extracted text split into 41 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/12332286__OQ__O-2006-0124__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/082911-2838_jeopardy_20thcenturyinventions_200': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/557711__REPORT__A5-2003-0208__EN_a39ee7': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33442460__WQ__E-2012-001633__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_56456': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11724514__WQA__E-2006-1715__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29808784__TA__20110406__EN_2dfb2c': Extracted text split into 50 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/067439-3516_jeopardy_countrydivas_100': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/096406-3857_doublejeopardy_akittycategory_400_84ed99': Extracted text split into 133 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/15897699__WQA__E-2007-3897__EN_617688': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/006933-4667_jeopardy_radiopersonalities_400': Extracted text split into 93 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/135139-4322_jeopardy_ataleoftwocities_600': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_6252': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20972391__WQ__E-2008-6788__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24961803__WQ__E-2010-1667__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/076914-1634_doublejeopardy_worldgeography_400': Extracted text split into 135 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/159898-3046_doublejeopardy_moviebiographies_200': Extracted text split into 55 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/149442-4240_doublejeopardy_1950smoviehouse_800_9cbd52': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_89307': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31964035__IMP-CONTRIB__20111014-CAN-29247__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12018158__WQA__E-2006-3167__EN_91d1ce': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/027139-2988_doublejeopardy_television_800': Extracted text split into 51 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/067174-4691_jeopardy_apieceofthefraction_800': Extracted text split into 154 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/061487-5022_jeopardy_thesenatorofattention_1000': Extracted text split into 93 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/021686-5038_doublejeopardy_moviemobiles_400': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/26637097__WQ__E-2010-6777__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_6539': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33360382__WQ__E-2012-001381__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14712093__WQ__E-2007-3196__EN_d5168a': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28697200__WQ__P-2011-000762__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_71749': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/9688205__TA__P6-TA-2005-0395__EN': Extracted text split into 564 chunk(s).


Batches:   0%|          | 0/18 [00:00<?, ?it/s]

  - Processing sample 'final_train/121429-4889_jeopardy_streetsmarts_200': Extracted text split into 72 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/21490957__WQ__E-2009-0992__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_79220': Extracted text split into 45 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/082425-3824_jeopardy_johngrishamslawbooks_200': Extracted text split into 96 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/32480790__WQA__E-2011-009630__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_49819': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/024892-3334_jeopardy_jazznicknames_400': Extracted text split into 70 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/15111665__WQA__E-2007-2126__EN_c7da6c': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_55458': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_60608': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/127535-4540_doublejeopardy_usrivers_2000': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_20951': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18052069__IM-PRESS__20080311-IPR-23638__EN_2bbe8c': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15910474__WQ__E-2007-4952__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8902588__TA__P6-TA-2005-0106__EN': Extracted text split into 127 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/9037284__QT__H-2005-0424__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20959594__IMP-CONTRIB__20090109-CAN-45818__EN_7b8e5a': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17460011__WQ__E-2008-0252__EN_9b3fc8': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/456447__QT__H-2003-0119__EN_0faca0': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12273666__QT__H-2006-0906__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32874312__WQA__E-2011-010038__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/194963-4758_jeopardy_theresnobusiness_1000': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/089235-1486_doublejeopardy_worldcapitals_1000': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/100782-3670_doublejeopardy_inventorsinventions_600': Extracted text split into 89 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/12400331__WQ__E-2006-4436__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/121763-3721_doublejeopardy_sex_200': Extracted text split into 62 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_49692': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20101527__WQA__E-2008-4020__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32988699__WQ__E-2011-012412__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11829836__REPORT__A6-2005-0141__EN_929746': Extracted text split into 187 chunk(s).


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_12852': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/123251-2352_jeopardy_poets_400': Extracted text split into 48 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/25164748__WQ__E-2010-2101__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30887854__WQA__E-2011-002290__EN_d845ad': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30001996__WQ__E-2011-003726__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6877058__WQA__E-2003-0686__EN_049e71': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/163876-3044_doublejeopardy_odetoengland_200': Extracted text split into 61 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/144609-1324_doublejeopardy_worldgeography_400': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/31277210__WQ__E-2011-007644__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/061952-3124_jeopardy_hannabarberacats_300': Extracted text split into 87 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/5714583__WQA__E-2003-2314__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/156442-2850_jeopardy_generalscience_500': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/5696142__WQ__E-2004-0383__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16600784__IM-PRESS__20060628-BRI-09328__EN_adb420': Extracted text split into 745 chunk(s).


Batches:   0%|          | 0/24 [00:00<?, ?it/s]

  - Processing sample 'final_train/033491-3833_doublejeopardy_bodyofliterature_200_a1879b': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21141767__WQ__E-2009-0135__EN_2abced': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18688878__WQ__E-2008-3028__EN_097918': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_78833': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/080675-4707_doublejeopardy_peninsulas_2600': Extracted text split into 92 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_76869': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_69368': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10356752__QT__H-2006-0057__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_18429': Extracted text split into 57 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/032666-3269_jeopardy_historicamericans_500': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/11846660__REPORT__A6-2005-0052__EN': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8169103__MOTION__B6-2004-0119__EN': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19188630__WQA__E-2008-2086__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20783779__TA__P6-TA-2008-0608__EN_f3948d': Extracted text split into 126 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/15662638__WQ__E-2007-4270__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/161226-4655_jeopardy_crosswordcluesd_1000_e95113': Extracted text split into 40 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/213091-4156_doublejeopardy_latriviata_1200': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/9441828__WQ__E-2005-2987__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12070745__WQA__E-2006-2879__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23659731__WQ__E-2009-5311__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33086434__WQA__E-2011-010524__EN': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_11098': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25182851__WQA__E-2010-1234__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/583594__PRESS__NA-20030829-1__EN': Extracted text split into 48 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_32204': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_48600': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_31371': Extracted text split into 39 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/022782-4622_doublejeopardy_discovery_2000': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11170627__WQA__E-2005-2800__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11706993__WQA__E-2006-1452__EN_012119': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10767230__WQ__E-2005-0582__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/041468-2836_jeopardy_5letterwords_300': Extracted text split into 46 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/14921659__WQA__E-2007-1612__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18216709__WQA__E-2008-0397__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/071575-3506_doublejeopardy_doctors_200': Extracted text split into 159 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_55499': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/114937-1257_doublejeopardy_magazines_800': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6426167__MOTION__B5-2001-0314__EN_03b3c3': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29907960__WQA__E-2011-002329__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26408598__WQ__E-2010-5108__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20514102__WQA__E-2008-5464__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19839475__WQ__E-2008-5057__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28599155__WQA__E-2010-010757__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18562904__WQA__E-2008-1538__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25274847__WQ__E-2010-2453__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/024485-3320_doublejeopardy_booksauthors_200': Extracted text split into 101 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/070523-3872_doublejeopardy_fillerup_1500': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/192577-4313_doublejeopardy_doggybag_2000_6e35a3': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/178922-4573_jeopardy_muchadoaboutnothing_200': Extracted text split into 57 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/135202-2841_jeopardy_overunder_300': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/145225-2862_doublejeopardy_1989books_800': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/17070911__IM-PRESS__20071211-IPR-14723__EN': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/070354-3745_doublejeopardy_rhymetime_800': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/31624731__WQA__E-2011-006196__EN_583722': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/085445-3040_doublejeopardy_namethatcountry_400': Extracted text split into 36 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/26637305__WQ__E-2010-6690__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25983500__WQ__E-2010-4099__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32189909__WQ__E-2011-009879__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17879437__TA__P6-TA-2008-0098__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12980818__TA__P6-TA-2007-0011__EN_fc9801': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/139702-2983_doublejeopardy_zoology_800_5ac6b2': Extracted text split into 75 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/163160-3531_jeopardy_englishships_400': Extracted text split into 57 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_28852': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_19167': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30087537__WQA__E-2011-000953__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33239889__WQ__E-2012-000843__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/200344-4976_doublejeopardy_letsgotoharvard_1200': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25273095__WQA__E-2010-1225__EN_71911e': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14176678__WQ__E-2007-2411__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_15909': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14334880__WQA__E-2007-0307__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/065808-4580_doublejeopardy_summerfun_800': Extracted text split into 154 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/25604690__WQ__E-2010-3446__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/092531-3435_jeopardy_idoknowjack_100': Extracted text split into 152 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/161135-1330_finaljeopardy_thesupremecourt_': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/184299-3687_jeopardy_olympicgoldmedalists_200': Extracted text split into 115 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/030289-3291_doublejeopardy_20thcenturyopera_400': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/068497-4342_jeopardy_er_200': Extracted text split into 136 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_46058': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26178643__WQA__E-2010-1443__EN_10a311': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_56974': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_29069': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/214112-1794_doublejeopardy_colorfulwordsphrases_800': Extracted text split into 149 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/077607-2995_doublejeopardy_theemmys_400_f3de35': Extracted text split into 150 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/30090751__WQ__E-2011-004093__EN_17d5c6': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/015596-5200_jeopardy_literaryobits_800': Extracted text split into 110 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/029651-4238_doublejeopardy_pennsylvania_1200': Extracted text split into 149 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_25335': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/14366192__QT__H-2007-0439__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22373220__IM-PRESS__20090420-FCS-53948__EN': Extracted text split into 83 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/26409605__WQ__E-2010-5159__EN_286066': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/184822-3695_doublejeopardy_deadlines_800': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6335827__MOTION__P5-RC-2004-0129__EN': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/33149894__IM-PRESS__20120124-NEW-36108__EN_092e49': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/056936-4896_jeopardy_rxmarksthespot_800': Extracted text split into 126 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/048247-3692_jeopardy_autumnatic_300': Extracted text split into 55 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_84105': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/085710-3914_doublejeopardy_howudoing_1000': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/009005-3393_doublejeopardy_roamintheworld_600_012261': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/165873-2854_jeopardy_oops_200': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/22473334__WQ__E-2009-3017__EN_23751e': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/061579-3048_doublejeopardy_actorsrole_1000': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/6452019__MOTION__B5-1999-0064__EN_a0f12d': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/158983-4571_jeopardy_musicalsettings_200': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/6265504__WQ__E-2004-0678__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/090489-4959_doublejeopardy_worldleaders_1600_129b51': Extracted text split into 130 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/6442628__MOTION__B5-2000-0360__EN_e4a9b6': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25718179__WQA__E-2009-6720__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_44938': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33292350__WQ__E-2012-001251__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22040314__WQ__E-2009-2004__EN_fa4850': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33360478__WQ__E-2012-001458__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_11126': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/7298436__TA__P5-TA-2000-0242__EN': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22537875__WQA__P-2009-2212__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26464055__WQ__E-2010-5761__EN_dc6649': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31582077__WQ__E-2011-008148__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/154892-4992_doublejeopardy_continentalgeography_800': Extracted text split into 58 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/14718954__WQA__E-2007-1577__EN_01e0bc': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/031440-2625_doublejeopardy_literature_400': Extracted text split into 62 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_14575': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27991624__WQA__E-2010-8271__EN_2b0df7': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/132224-4286_jeopardy_youcanquoteme_800': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/139293-4598_jeopardy_smalltownfolks_200': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/052430-1871_jeopardy_womeninsports_100': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_40903': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/162598-5206_jeopardy_literarygeography_400': Extracted text split into 56 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/27650590__WQ__E-2010-9234__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20120832__IMP-CONTRIB__20081016-INS-39646__EN': Extracted text split into 46 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/19665910__WQA__E-2008-4256__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/041293-1307_jeopardy_bluemovies_300': Extracted text split into 57 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/098535-3837_doublejeopardy_associations_800': Extracted text split into 138 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/6833639__TA__P5-TA-2003-0501__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11398983__WQA__E-2004-1786__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_16517': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/111486-4166_doublejeopardy_rhymetime_2000': Extracted text split into 82 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/17991677__WQ__E-2008-1497__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25610057__WQA__E-2010-1407__EN_e441d5': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_47507': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/059052-2582_doublejeopardy_thecivilwar_400': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/050181-4206_doublejeopardy_ohwhatayear_400_146540': Extracted text split into 144 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/24605492__WQ__E-2010-0549__EN_6e3978': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10743708__WQ__E-2005-0085__EN_219890': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11661945__WQA__E-2006-2098__EN_3785f6': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/037868-3422_doublejeopardy_screensirens_200_165e69': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/24644664__WQ__E-2010-0751__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28969334__WQ__E-2011-001108__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19755417__WQA__E-2008-4193__EN': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33677876__WQ__E-2012-002709__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29585320__WQA__E-2011-000585__EN_aad05e': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/155887-3486_jeopardy_holidays_300': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_24429': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14421694__WQA__E-2007-0532__EN_1f1b9b': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29756239__MOTION__B7-2011-0276__EN': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/183523-1265_finaljeopardy_americannovels_': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21193308__WQA__E-2008-6123__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/019658-4583_doublejeopardy_5inarow_400_4accb8': Extracted text split into 133 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/033701-4191_jeopardy_shakespeare_200': Extracted text split into 59 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/27653187__TA__P7-TA-2010-0395__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/191176-3084_jeopardy_theburbs_400': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/29734555__WQ__E-2011-003069__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26092496__WQ__E-2010-4254__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/046633-5058_jeopardy_ahorseisahorse_600': Extracted text split into 72 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/051695-3081_doublejeopardy_formercapitals_1000': Extracted text split into 95 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/33362557__WQA__E-2011-008730__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_11495': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/33442126__WQ__E-2012-001351__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16138873__WQ__E-2007-5182__EN_9b54e3': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20226473__IMP-CONTRIB__20081027-CAN-40671__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/151492-4589_jeopardy_fruit_200_d02d58': Extracted text split into 64 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/20149418__OQ__O-2008-0108__EN_bbe43f': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/018141-4866_finaljeopardy_notablenames_': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/057284-2885_jeopardy_people_200': Extracted text split into 40 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/15915989__WQ__E-2007-4981__EN_ba6a5e': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23028062__WQ__E-2009-4080__EN_0a86a5': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31333717__IMP-CONTRIB__20110825-CAN-25308__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/461411__QT__H-2003-0147__EN_4c48b4': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/124464-3585_doublejeopardy_scientists_800': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/009725-4738_doublejeopardy_rolloverbeethoven_800': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/051930-4895_doublejeopardy_shakespeare_1200': Extracted text split into 133 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/24166609__WQ__P-2010-0001__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23027765__WQ__E-2009-4038__EN_172a97': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_60224': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_8360': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30880871__WQ__E-2011-006081__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27276330__IMP-CONTRIB__20101014-CAL-87128__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/112583-3068_jeopardy_nonsensewords_400_5bb3d7': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/166828-3438_jeopardy_birdwordsphrases_300': Extracted text split into 76 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/28692726__WQ__E-2011-000445__EN_6e30e9': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/066121-3823_doublejeopardy_thecinema_800': Extracted text split into 136 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/33684137__WQA__E-2012-000574__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28173679__WQ__E-2010-010405__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9056212__QT__H-2005-0445__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/022886-2968_doublejeopardy_themovies_400': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/16147948__WQ__E-2007-5336__EN_657790': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_59304': Extracted text split into 39 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/15915741__WQ__E-2007-4922__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24071485__WQ__P-2009-6416__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/204174-4905_doublejeopardy_oncommissions_400': Extracted text split into 49 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/134828-4529_finaljeopardy_timemagazinesmanoftheyear_': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/143033-2578_jeopardy_science_100': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/6533101__TA__P5-TA-2000-0259__EN': Extracted text split into 131 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/9714895__WQ__E-2005-3669__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14335104__WQA__E-2007-1422__EN_edb0f0': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_62009': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/664307__QT__H-2003-0700__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/5714913__WQA__E-2003-2519__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/097184-4859_doublejeopardy_theresaway_800': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/27656956__WQA__E-2010-7696__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22370507__AGENDA__20090507__EN': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29567875__WQ__P-2011-002817__EN_615864': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29980649__IM-PRESS__20110418-IPR-18102__EN': Extracted text split into 51 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_19656': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31884533__WQ__E-2011-009003__EN_0d35b2': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6443262__MOTION__B5-2000-0107__EN_3b16a9': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/2298828__WQ__E-2003-1218__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/060622-2340_jeopardy_aroundthekitchen_100': Extracted text split into 129 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/067073-1289_doublejeopardy_proverbs_400': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24872977__WQ__E-2010-1371__EN_724cde': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29726083__MOTION__B7-2011-0237__EN': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_38313': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_27191': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21566857__WQA__E-2009-0001__EN_f5c865': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10324799__PV__20060201__EN_d39c57': Extracted text split into 167 chunk(s).


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

  - Processing sample 'final_train/063775-3816_doublejeopardy_uhavetheright_200': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/449663__QT__H-2003-0102__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/216845-5195_doublejeopardy_geographicalbands_800_efe4f9': Extracted text split into 83 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/31225037__WQ__E-2011-007063__EN_3a1d1f': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/128217-3250_doublejeopardy_minesallmines_1000': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/077634-4319_jeopardy_theolympicgames_400': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_82662': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_53075': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17865349__WQ__E-2008-1322__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6378469__REPORT__A5-2004-0199__EN_10c557': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_49645': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/7135811__WQA__E-2003-1626__EN': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13798430__WQA__P-2007-0792__EN_84c8a4': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25404922__WQA__E-2010-1537__EN_ea8925': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/179912-2577_doublejeopardy_mythology_800': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/20228007__WQ__E-2008-5743__EN_6c75c4': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14335184__WQA__E-2007-2011__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/055089-4274_doublejeopardy_antimatter_400': Extracted text split into 149 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/26960261__IM-PRESS__20100920-IPR-82951__EN': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17823885__WQ__E-2008-1183__EN_182c7f': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16572998__WQA__E-2007-3489__EN_78ce79': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12918287__WQA__P-2006-5461__EN_f4207b': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/112700-3638_jeopardy_protozoa_500': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/306576__QT__H-2002-0694__EN_186333': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_16962': Extracted text split into 40 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/185540-4032_doublejeopardy_crosswordcluesl_2000': Extracted text split into 90 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_24696': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_60804': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/705264__REPORT__A5-2003-0412__EN': Extracted text split into 323 chunk(s).


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

  - Processing sample 'final_train/6407332__MOTION__B5-2002-0107__EN_e0e381': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28465183__WQA__E-2010-9884__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/195087-3590_doublejeopardy_englishmonarchs_200': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_19080': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24872933__WQ__E-2010-1360__EN_dbc0c9': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/3584242__WQ__E-2003-3767__EN_bf9b88': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6944142__TA__P5-TA-2002-0509__EN_b68818': Extracted text split into 155 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_69263': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22584875__IM-PRESS__20090511-STO-55546__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/048589-3883_doublejeopardy_nationalbookawardwinners_1000': Extracted text split into 42 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_26856': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/021005-3647_doublejeopardy_beastlycommonbonds_1000': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/11439648__WQA__E-2004-2611__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/209752-3274_doublejeopardy_awhitmansampler_400': Extracted text split into 82 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/2313414__WQ__E-2003-2940__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_57974': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12140806__WQ__E-2006-4003__EN_997758': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/216751-5070_jeopardy_animation_200': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/8259113__WQ__E-2004-1757__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11842767__REPORT__A6-2005-0076__EN': Extracted text split into 291 chunk(s).


Batches:   0%|          | 0/10 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_78552': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25904512__WQA__E-2010-2044__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/166190-4456_doublejeopardy_inthebiginning_400': Extracted text split into 70 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_30018': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13146523__WQ__P-2007-0917__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_29331': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/154209-3571_jeopardy_theyrelyrical_200_c7a107': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/31243690__WQ__E-2011-007385__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/056287-4145_jeopardy_rodentmarsupialorprimate_1000_fe1223': Extracted text split into 58 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/27285244__WQ__E-2010-8216__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/038509-3258_jeopardy_countriesoftheworld_300': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/12917331__WQA__E-2006-3818__EN_7241ac': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/196342-4885_doublejeopardy_sciencenature_1600': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12859719__WQ__E-2006-5378__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/176794-2116_jeopardy_americana_400': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/9000858__TA__P6-TA-2005-0177__EN_9bfbdf': Extracted text split into 70 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/035963-4304_doublejeopardy_famousfilms_1600': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/23263121__IM-PRESS__20090928-IPR-61363__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32529811__WQ__E-2011-011011__EN_c734d2': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/065003-2977_jeopardy_americanhistory_300': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/180236-970_jeopardy_corporateamerica_400_fa0628': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/054266-5207_doublejeopardy_welcometoolemiss_2000': Extracted text split into 89 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/21245273__WQA__E-2008-6323__EN_ac4588': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8381817__WQ__E-2004-2761__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13236607__WQ__E-2007-0903__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/2308651__WQ__E-2003-2353__EN_1e0306': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11916655__WQA__E-2006-0289__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20972443__WQ__E-2008-6808__EN_7df84a': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14421338__WQA__E-2007-0203__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/069599-5074_jeopardy_physed_800': Extracted text split into 153 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/13956393__TA__P6-TA-2007-0148__EN': Extracted text split into 382 chunk(s).


Batches:   0%|          | 0/12 [00:00<?, ?it/s]

  - Processing sample 'final_train/8605130__WQA__P-2004-0253__EN_d885ec': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25864639__WQ__E-2010-3919__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/118086-3977_jeopardy_magsformen_400': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/193948-4770_jeopardy_theborisyeltsinfile_1000': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/063235-2820_doublejeopardy_jazz_400': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/9224505__QT__H-2005-0595__EN_1e0c09': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31955490__WQ__E-2011-009132__EN_9272b5': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/060425-3000_doublejeopardy_looktothefuture_600': Extracted text split into 93 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_43128': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22040346__WQ__E-2009-2012__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/027942-2957_jeopardy_hitsofthe1890s_400': Extracted text split into 45 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/028364-4714_jeopardy_wevegotdesignsonbroadway_3600': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22033641__IMP-CONTRIB__20090401-CAN-53062__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18577008__WQA__E-2005-1428__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12063038__WQ__P-2006-3824__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26153388__WQ__E-2010-4712__EN_07c7de': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26007870__WQ__E-2010-4155__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33659982__WQ__E-2012-002394__EN_4d0de1': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/009404-2893_jeopardy_rhymetime_400': Extracted text split into 122 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/27650352__WQ__E-2010-9287__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19665634__WQA__E-2008-3644__EN_7be4a8': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/049144-5077_jeopardy_facesofamerica_400_d32cae': Extracted text split into 62 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_9263': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31258369__WQ__E-2011-007487__EN_b6c077': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/141026-4503_jeopardy_soundslikeanirishcounty_800': Extracted text split into 48 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_14605': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6878160__WQA__E-2003-3000__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6436711__QT__H-2004-0208__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18135915__WQ__E-2008-1950__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23190649__WQA__E-2009-3887__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/158037-3641_jeopardy_fileunderd_400': Extracted text split into 61 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/29153251__WQ__E-2011-001635__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/132280-2872_jeopardy_lastplace_300': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/013111-2835_doublejeopardy_parks_400': Extracted text split into 114 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/098577-5154_doublejeopardy_itsasmallworld_400': Extracted text split into 98 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_32193': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/33492368__WQ__E-2012-002150__EN_d55d58': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16482050__IM-PRESS__20060404-STO-07063__EN': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_76544': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_2139': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_37400': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26096491__IM-PRESS__20100621-IPR-76410__EN': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25138353__WQ__E-2010-2015__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/028038-4955_finaljeopardy_historicdocuments_': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11267120__WQA__E-2005-0689__EN_c3c725': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_14273': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/098543-3837_finaljeopardy_poets_': Extracted text split into 155 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_56487': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/038304-4499_doublejeopardy_teatime_1600': Extracted text split into 64 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/9759908__WQ__E-2005-3919__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_84140': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11203861__WQA__P-2005-3329__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/140157-482_jeopardy_gladtidings_400': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/26621114__WQ__E-2010-6396__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29559745__WQA__E-2011-000081__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25160459__WQA__E-2010-0836__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/211282-3359_jeopardy_sixflags_500': Extracted text split into 150 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_73226': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19267520__WQ__E-2008-3747__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14326562__WQ__P-2007-2690__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/137461-4969_jeopardy_thatsmybb_600': Extracted text split into 62 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/33084050__WQ__E-2012-000317__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/284971__REPORT__A5-2002-0291__EN': Extracted text split into 180 chunk(s).


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

  - Processing sample 'final_train/29559281__WQA__E-2010-010928__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/088011-3889_doublejeopardy_thisthat_200': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_46441': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/062685-2830_doublejeopardy_filmdirectors_200': Extracted text split into 150 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_69598': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11998970__WQ__E-2006-2095__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22203259__WQA__E-2009-0684__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31725972__WQA__E-2011-007344__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26448063__WQ__E-2010-5345__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/071653-2837_doublejeopardy_mansions_1000': Extracted text split into 135 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_11498': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/206794-3095_doublejeopardy_worldwarii_600': Extracted text split into 151 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/17353642__WQ__E-2008-0109__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26153408__WQ__E-2010-4724__EN_f53f4b': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27926334__WQ__E-2010-9945__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/199132-2050_doublejeopardy_gems_400_2f39e3': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/27266816__IMP-CONTRIB__20101013-CAN-87008__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/209753-3274_doublejeopardy_playingdoctor_400': Extracted text split into 151 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_2106': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/014355-3402_doublejeopardy_theuncategory_400': Extracted text split into 117 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/184599-3971_jeopardy_dc_200': Extracted text split into 144 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/169116-3664_doublejeopardy_commonbonds_1000': Extracted text split into 43 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/194420-2823_jeopardy_latinlegallingo_300': Extracted text split into 156 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/31673075__WQA__E-2011-004701__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27712038__WQA__E-2010-7764__EN_75dbf6': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9382588__WQ__E-2005-2505__EN_d43388': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/027482-3434_doublejeopardy_bemypal_200': Extracted text split into 155 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/118482-1884_doublejeopardy_historicgs_600_096558': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/2321367__WQ__P-2003-1397__EN_25ffeb': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/149113-3459_doublejeopardy_popularclassicalmusic_1000': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/13996777__WQA__P-2006-2500__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/093406-4865_doublejeopardy_ancienttimes_800': Extracted text split into 126 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/27088303__WQ__P-2010-7829__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23335885__COMPARL__2009-10-19-6__EN_b2f34b': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/067624-4543_jeopardy_theunitedstatesofadvertising_600': Extracted text split into 135 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/9330535__WQ__P-2005-2277__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13139430__WQ__E-2007-0272__EN_b91456': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_26129': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12952553__WQ__P-2007-0174__EN_91b585': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19546025__QT__H-2008-0678__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11912893__WQ__E-2006-3496__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_45612': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28225679__WQ__E-2010-010689__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/022056-3709_doublejeopardy_whowastheusveep_600': Extracted text split into 89 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/5698052__WQ__E-2004-0545__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/086080-2952_jeopardy_the1940s_200': Extracted text split into 133 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/9432350__WQ__E-2005-3041__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25276315__WQ__P-2010-2855__EN_c8a6dc': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10119874__WQ__E-2005-4639__EN_6f8e9b': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13179580__WQA__E-2006-5455__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22208035__WQ__E-2009-2635__EN_67905c': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/078638-4392_doublejeopardy_threescompany_2000': Extracted text split into 129 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/18273228__WQ__E-2008-2166__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/210155-3152_jeopardy_coaches_400': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/20597712__WQ__E-2008-6294__EN_66204b': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19367027__WQA__E-2008-2984__EN_a1772e': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/068102-4749_jeopardy_panamania_1000': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/178895-4288_doublejeopardy_celebrityrhymetime_400': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/030992-2049_jeopardy_usa_400': Extracted text split into 57 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/25206611__WQ__P-2010-2676__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_73380': Extracted text split into 39 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/057130-4794_doublejeopardy_artists_400': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/26442392__WQ__E-2010-5473__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/109710-3620_doublejeopardy_oldmovies_200': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/159054-4763_jeopardy_theyusedtobeincharge_600': Extracted text split into 39 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/168614-2932_doublejeopardy_comicactors_400': Extracted text split into 76 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/24641280__WQ__E-2010-0612__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22455829__WQA__E-2009-1500__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27285312__WQ__E-2010-8243__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_32379': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/211304-3359_doublejeopardy_thestars_800': Extracted text split into 137 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/14722153__WQ__E-2007-3124__EN_225bf9': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/181268-3726_jeopardy_historicpeople_500': Extracted text split into 134 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/31139636__WQ__E-2011-006843__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21052326__IM-PRESS__20090400-AGD-00000__EN': Extracted text split into 205 chunk(s).


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

  - Processing sample 'final_train/8886792__MOTION__B6-2005-0254__EN': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16641684__IM-PRESS__20061002-IPR-11249__EN': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28464979__WQA__E-2010-010006__EN_7fb01b': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25983056__WQ__E-2010-3985__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/532699__REPORT__A5-2003-0191__EN_82ac89': Extracted text split into 81 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_41294': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15848751__WQ__E-2007-4703__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26925378__WQ__P-2010-7423__EN_1c40bb': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18730266__QT__H-2008-0383__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/045795-4197_doublejeopardy_architecture_400_977f3a': Extracted text split into 71 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/32906278__WQA__E-2011-009135__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/001236-1302_doublejeopardy_worldgeography_800': Extracted text split into 126 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/172875-3342_doublejeopardy_inthebeginning_400': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_31446': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19538852__COMPARL__2008-09-01-1__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_87865': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/205474-4080_jeopardy_yourfeminineside_800': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/30992729__WQA__E-2011-004101__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27588163__WQ__E-2010-8881__EN_de960d': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10014022__WQ__E-2005-4306__EN_ae2944': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26808293__WQ__P-2010-6947__EN_04dc7f': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_13341': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/071887-2921_doublejeopardy_historicplaces_600': Extracted text split into 132 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_19591': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8581895__WQA__E-2003-3542__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/203433-4800_jeopardy_presidentialcampaigns_200': Extracted text split into 128 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/24116869__WQA__P-2009-5932__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/051269-3927_doublejeopardy_heybeautiful_600_9480d0': Extracted text split into 42 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/097485-3649_jeopardy_historicamericans_200': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/29612225__WQA__E-2010-011149__EN_647697': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10588088__QT__H-2006-0221__EN_1b0e5c': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_17758': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/000262-4931_jeopardy_whatsinaname_1000': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/063414-3123_doublejeopardy_kiddylit_400': Extracted text split into 51 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_51037': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/2304009__WQ__E-2003-1819__EN_e179a9': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/098280-1170_doublejeopardy_russianhistory_400': Extracted text split into 145 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/17084118__IMP-CONTRIB__20071218-CAN-16181__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/216835-5195_jeopardy_africancities_1000': Extracted text split into 103 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/20512454__WQ__P-2008-6335__EN_fd588a': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26464091__WQ__E-2010-5771__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19703278__WQA__E-2008-3474__EN_b2c1c3': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/109611-4716_doublejeopardy_mammals_2000_57cf8e': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/23484861__WQA__E-2009-3782__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_80111': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23966785__IMP-CONTRIB__20091211-CAN-66358__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/028038-4955_finaljeopardy_historicdocuments__38ef02': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22203425__WQA__E-2009-0843__EN_688ac0': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_78138': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25892377__WQ__P-2010-4114__EN_331177': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/190631-3229_jeopardy_toserve_200': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_71893': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/23922030__WQA__E-2009-4861__EN_63c6cb': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/069056-1984_jeopardy_statemottoes_700_795df6': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/047106-3287_jeopardy_funwithacronyms_100_46335e': Extracted text split into 41 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/16849957__IM-PRESS__20070710-IPR-09040__EN_32d1b2': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/036712-2579_jeopardy_famousfranks_500': Extracted text split into 62 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/12843902__WQ__E-2006-5411__EN_2b8dd0': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/193230-3057_jeopardy_aloafofbread_300': Extracted text split into 126 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/142654-4351_jeopardy_atriptothetoweroflondon_1000': Extracted text split into 71 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/33492640__WQ__E-2012-002042__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30993153__WQA__E-2011-004376__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30993449__WQA__E-2011-004673__EN_bc9929': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_46238': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/15812861__WQA__E-2007-2955__EN': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_64516': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/051463-4706_doublejeopardy_worldtravel_2000': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/24961875__WQ__E-2010-1691__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/196122-5003_jeopardy_americana_200': Extracted text split into 127 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/2311929__WQ__E-2003-2770__EN_df852c': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/128350-2779_jeopardy_heraldry_200': Extracted text split into 70 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/12231031__WQ__E-2006-4369__EN_b28da1': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23083317__WQA__E-2009-4052__EN_1fc2fa': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12063380__WQ__E-2006-3869__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29563306__WQ__E-2011-002563__EN_05513f': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32717726__MOTION__B7-2011-0700__EN': Extracted text split into 61 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_76272': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33257141__IMP-CONTRIB__20120210-CAN-37781__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15731674__WQ__E-2007-4397__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27062741__WQ__E-2010-7454__EN_cb9bd4': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25939908__IM-PRESS__20100614-IPR-76042__EN_25d1ba': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33532106__WQA__P-2012-000408__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21819226__WQA__E-2008-6830__EN_58cc05': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23918200__WQ__E-2009-6010__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_71344': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/204047-3535_jeopardy_wegetletters_300': Extracted text split into 145 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/085689-3914_doublejeopardy_colorfulgeography_400': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/23823376__IM-PRESS__20091120-STO-64906__EN': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/029847-2931_jeopardy_sportsterms_200_451b89': Extracted text split into 93 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/31539332__TA__P7-TA-2011-0385__EN': Extracted text split into 123 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/29453394__WQ__P-2011-002470__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_78501': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/100754-3670_jeopardy_ancientgreece_300': Extracted text split into 132 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_8963': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/30287769__TA__P7-TA-2011-0239__EN_8e9cf0': Extracted text split into 133 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/18562792__WQA__E-2008-1302__EN_fc5ec1': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_10943': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8579709__WQA__P-2003-2746__EN': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21397086__WQ__P-2009-0898__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23876839__WQ__E-2009-5969__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15982963__WQ__E-2007-4956__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17139544__WQA__E-2007-5425__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_446': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/020688-4981_doublejeopardy_richardwagner_800': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_80075': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_41256': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/124332-3132_doublejeopardy_rockstars_400_f2c9e2': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/159464-3311_jeopardy_classicmovielines_400': Extracted text split into 41 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/039172-838_jeopardy_theyearthatwas_500': Extracted text split into 62 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/26487759__WQ__E-2010-5897__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29233479__WQ__E-2011-001553__EN_a35e62': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32422819__REPORT__A7-2011-0387__EN': Extracted text split into 205 chunk(s).


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

  - Processing sample 'final_train/11378217__WQA__E-2004-3184__EN_a08775': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29907797__WQA__E-2011-001416__EN_e9238f': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/130428-5056_doublejeopardy_dickenscharacters_2000': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/19393563__WQ__E-2008-4338__EN_dda68a': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21944182__WQA__E-2009-0276__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19425611__WQA__E-2008-3505__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/125471-3899_doublejeopardy_60stv_1000': Extracted text split into 71 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/30372997__WQA__E-2011-002045__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/203166-5000_doublejeopardy_primeministerrhyme_400': Extracted text split into 128 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/6426368__MOTION__B5-2001-0347__EN': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26055111__IMP-CONTRIB__20100618-DLI-76336__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22814524__WQA__E-2009-2994__EN_833e51': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_59811': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/7528519__WQA__E-1999-0756__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28754001__MOTION__B7-2011-0118__EN': Extracted text split into 64 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/23761378__TA__P7-TA-2009-0079__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/080621-4707_jeopardy_academicpentathlon_200': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/22277517__MOTION__B6-2009-0251__EN': Extracted text split into 45 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/174045-2143_jeopardy_businessindustry_400': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/014852-4084_jeopardy_usbodiesofwater_200': Extracted text split into 117 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/031783-4305_jeopardy_anancientlifetimeago_800': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/11655000__WQ__E-2006-2865__EN_40714a': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/039457-1792_jeopardy_shopping_300': Extracted text split into 75 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/27712452__WQA__E-2010-8386__EN_643687': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11151462__WQA__E-2005-4442__EN_21b72c': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30880749__WQ__E-2011-006187__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12844034__WQ__E-2006-5454__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/014684-3846_jeopardy_uscoins_300': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11376675__WQA__E-2004-3008__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22851603__WQA__E-2009-3179__EN_e8be95': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9440978__WQ__E-2005-2889__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22622284__WQA__E-2009-1389__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/023351-3775_jeopardy_temperature_500': Extracted text split into 138 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_29652': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29550245__WQ__E-2011-002523__EN_fe2182': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27061390__IM-PRESS__20100923-FCS-83457__EN': Extracted text split into 258 chunk(s).


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

  - Processing sample 'final_train/048964-4956_jeopardy_science_1000': Extracted text split into 70 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/040918-4829_jeopardy_onpresidentbushsipod_200': Extracted text split into 60 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/13146663__WQ__E-2007-0412__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/019765-4925_jeopardy_pastpresentorfuture_800': Extracted text split into 136 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/19540503__IM-PRESS__20080901-IPR-35862__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/037612-3729_jeopardy_mountains_400': Extracted text split into 76 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/23303180__WQA__E-2009-3857__EN_969760': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_61700': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/205679-4192_doublejeopardy_everyonesacomedian_1200': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/038667-3239_jeopardy_nationalmonuments_100': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/18434101__REPORT__A6-2008-0156__EN': Extracted text split into 265 chunk(s).


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

  - Processing sample 'final_train/029813-4761_doublejeopardy_sciencenature_800_464b19': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/10767702__WQ__E-2005-0644__EN_b9c1e4': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/133355-3661_doublejeopardy_vivalarevolution_200_6e75a1': Extracted text split into 71 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_33209': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/022448-3220_jeopardy_whatsupduck_300_b9d59a': Extracted text split into 156 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/041607-4512_doublejeopardy_volcanoes_400': Extracted text split into 71 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/197725-5096_doublejeopardy_acropolisnow_5000': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/048121-5127_jeopardy_gotellitonthemountain_400': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/134958-4994_jeopardy_dough_600_7414f6': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/051546-4264_jeopardy_newsmakers_800_5847bb': Extracted text split into 157 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_380': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/052756-3563_doublejeopardy_mindyourpsqs_600': Extracted text split into 158 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/182439-3703_doublejeopardy_itbordersboth_1000': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/207687-5114_finaljeopardy_playcharacters_': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26077961__WQ__E-2010-4385__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10205529__MOTION__B6-2006-0065__EN_5f7cfb': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29457744__WQA__E-2010-011280__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26649594__WQA__E-2010-4989__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/095658-3674_jeopardy_killers_300': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/099771-4189_doublejeopardy_thetoweroflondon_400': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/33669492__IMP-CONTRIB__20120322-CAN-41616__EN_9f7f78': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/093425-4865_doublejeopardy_worldwariimovies_2000': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/17439597__QT__H-2008-0066__EN_221fd7': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19735629__WQ__E-2008-4908__EN_1c8b1e': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20331930__WQA__E-2008-4961__EN_419980': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9760192__WQ__E-2005-3960__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/181257-3726_jeopardy_thereelworld_300': Extracted text split into 55 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/161997-3691_jeopardy_isthatyourfinalanswer_100': Extracted text split into 64 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/24605544__WQ__E-2010-0562__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_9094': Extracted text split into 36 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/28889409__WQ__E-2011-000917__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/007171-3191_jeopardy_esotericknowledge_100': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/11141755__WQA__P-2005-4683__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25489512__REPORT__A7-2010-0132__EN': Extracted text split into 286 chunk(s).


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_80097': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25789685__WQ__E-2010-3707__EN_d45a82': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/188950-3428_jeopardy_usotour_200': Extracted text split into 152 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/22650503__WQA__E-2009-2249__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_49348': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10913318__WQ__P-2006-1198__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18637807__WQA__E-2008-1553__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/283496__REPORT__A5-2002-0307__EN': Extracted text split into 121 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/002157-3112_jeopardy_action_200': Extracted text split into 152 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/198578-3928_jeopardy_tilldeathdouspartnot_500': Extracted text split into 47 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/058870-4087_doublejeopardy_4syllablewords_800': Extracted text split into 160 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/31430113__WQA__E-2011-005610__EN_6d9384': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22086783__WQ__E-2009-1927__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/054792-3752_doublejeopardy_chemistry_400': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/2307386__WQ__E-2003-2193__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/029649-4238_doublejeopardy_onewordortwo_1200': Extracted text split into 137 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_46083': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/32877215__WQ__E-2011-011915__EN_6454ba': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33707256__WQ__E-2012-002820__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/126583-3791_doublejeopardy_tropicana_800': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/29438677__WQA__E-2010-010071__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/125503-3845_jeopardy_namethehitmakers_500': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32822511__WQA__E-2011-010071__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27400947__WQA__E-2010-6842__EN_9cb3cd': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/133950-4964_jeopardy_slogansonthespot_1000': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/189941-4259_doublejeopardy_songsfromdisneyfilms_800': Extracted text split into 76 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/28255064__IMP-CONTRIB__20110106-CHE-11321__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/164721-3_doublejeopardy_worldoffood_1000_36cfb5': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8417825__WQ__E-2004-2939__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27401482__WQA__E-2010-7469__EN_c2b6c4': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10561349__QT__H-2006-0215__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/140355-4188_jeopardy_scoobydooat32_200': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/121667-1266_doublejeopardy_games_1000_b58f94': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33710452__WQA__E-2012-000626__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/132902-4922_doublejeopardy_itsnottv_800': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_80510': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/006449-3010_doublejeopardy_poorfamous_600': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/1832867__WQ__E-2002-0578__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12017198__WQA__E-2006-2333__EN': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/157484-3158_jeopardy_starwars_100': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/15170965__WQ__E-2007-3892__EN_859839': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/210971-3429_jeopardy_southoftheborder_200': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29402369__WQ__E-2011-001775__EN_c360c1': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16744250__IM-PRESS__20070309-IPR-04004__EN_fa0bc7': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/201381-3617_finaljeopardy_businessindustry_': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/065060-3902_jeopardy_elvishitsinotherwords_200_ee2d6a': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32603415__WQA__E-2011-009356__EN_a3ff4e': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/153870-4224_doublejeopardy_theliquorcabinet_400': Extracted text split into 64 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/15814455__QT__H-2007-0740__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_61925': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/192083-969_doublejeopardy_musicaleurope_600_8adff6': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/15576961__WQA__P-2007-3638__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30883777__MOTION__B7-2011-0380__EN': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/8725772__OQ__O-2005-0020__EN_99398f': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/102320-3398_doublejeopardy_swords_600': Extracted text split into 153 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/12016102__WQA__E-2006-0429__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/043172-2863_jeopardy_moststressfuljobs_500_a7a97e': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/12135272__QT__H-2006-0833__EN_be0a60': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/113657-1239_jeopardy_government_400': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/13544372__AGENDA__20070425__EN': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/019629-4583_jeopardy_presidentiallibraries_400': Extracted text split into 134 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/31130510__WQA__E-2011-005156__EN_39ee43': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29550233__WQ__E-2011-002472__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/006542-3358_jeopardy_insects_400': Extracted text split into 155 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/031611-2576_doublejeopardy_18thcenturyamericans_200': Extracted text split into 59 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/036177-3080_jeopardy_innout_500_5b6929': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/6397549__MOTION__B5-2003-0289__EN': Extracted text split into 117 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/101536-2808_doublejeopardy_thedreadedoperacategory_200': Extracted text split into 151 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/128595-2680_jeopardy_quotations_300': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/30717149__WQ__E-2011-005734__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31575117__WQ__E-2011-008079__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/047246-2908_jeopardy_countriesoftheworld_500': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/5696902__WQ__E-2003-3610__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11151234__WQA__E-2005-4281__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/136183-3446_doublejeopardy_reallygrimmfairytales_600': Extracted text split into 83 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/32614269__WQ__P-2011-011455__EN_6ce02b': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_42106': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/141962-4153_doublejeopardy_celeblls_2000': Extracted text split into 76 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/25315554__WQ__E-2010-2402__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/212443-3369_jeopardy_popmusic_100': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/075257-3598_doublejeopardy_18thcenturytheater_1000': Extracted text split into 133 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/061417-2914_jeopardy_frenchcuisine_300': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/32470418__WQ__E-2011-010622__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33019832__WQA__E-2011-011323__EN_c0e2db': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17203094__WQA__E-2007-5010__EN_562cd6': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26812890__WQA__E-2010-5175__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/199740-2851_doublejeopardy_1996_600': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/186871-4996_jeopardy_letusworship_200': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30581014__WQA__E-2011-003254__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_61187': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/185836-4847_finaljeopardy_classicliterature_': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9868266__WQ__E-2004-3604__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15171624__WQ__E-2007-3966__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26993323__WQA__E-2010-3955__EN_2516bc': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14421978__WQA__E-2007-0718__EN_7451d0': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_15287': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19680059__WQA__E-2008-3578__EN_af309a': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/2309058__WQ__E-2003-2397__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25940231__MOTION__P7-RC-2010-0296__EN': Extracted text split into 87 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/043762-3790_jeopardy_theemmys_400_cba5ec': Extracted text split into 54 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/198419-4554_doublejeopardy_afewbarsofmusic_1200': Extracted text split into 137 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/070311-3745_jeopardy_thebritishinvasion_200': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/11276012__WQA__E-2005-0085__EN_aaeecc': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13945190__WQ__E-2007-1824__EN_bbc239': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/000950-3619_jeopardy_weeds_100': Extracted text split into 115 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/21146139__WQA__E-2008-6585__EN_f39e6a': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/188554-3365_doublejeopardy_reallytoughcapitalcities_200': Extracted text split into 44 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/32417326__WQA__E-2011-009124__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32417074__WQA__E-2011-008704__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_65795': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21279956__WQ__E-2009-0306__EN_b1bda7': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_34583': Extracted text split into 39 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_71509': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_63649': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28039165__IMP-CONTRIB__20101213-CAN-08988__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_9659': Extracted text split into 43 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/5713851__WQA__E-2003-1908__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/078682-3071_doublejeopardy_composers_400': Extracted text split into 130 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_49004': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_74529': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_82210': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31673331__WQA__E-2011-006601__EN_92d613': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/071283-4366_doublejeopardy_rwanda_800': Extracted text split into 134 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/19656406__WQ__P-2008-4905__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24038561__WQ__E-2009-6169__EN_5171a1': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24924587__WQA__E-2009-6539__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/172840-3342_jeopardy_sabrinaswitchhandbook_100': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_62807': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_54601': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/132017-3876_doublejeopardy_statesmen_1000': Extracted text split into 43 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/17774718__WQA__P-2008-0433__EN_25214a': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/102557-3489_doublejeopardy_thisisplanetearth_600': Extracted text split into 140 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_73392': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/084378-4564_doublejeopardy_junebugged_800': Extracted text split into 152 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_11968': Extracted text split into 36 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/30476143__REPORT__A7-2011-0185__EN': Extracted text split into 116 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_88737': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15932382__REPORT__A6-2007-0393__EN': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/048338-3943_doublejeopardy_businessindustry_600': Extracted text split into 62 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/199465-3622_jeopardy_youngstars_200': Extracted text split into 137 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_87375': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/210715-3593_doublejeopardy_vocabulary_800': Extracted text split into 140 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/050404-3509_jeopardy_cardriver_400': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/004890-3331_doublejeopardy_dicerollnicknames_200': Extracted text split into 42 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/27926310__WQ__E-2010-9870__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20224146__IMP-CONTRIB__20081027-CAN-40669__EN_8ee1c4': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27586414__IMP-CONTRIB__20101110-CAN-93494__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10743480__WQ__E-2005-0829__EN_c6c1eb': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/119309-4915_doublejeopardy_oldhampshire_1200': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_80108': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_9993': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21279861__WQ__E-2009-0254__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/255367__QT__H-2002-0533__EN_7a9207': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31437877__WQ__E-2011-007899__EN_f93265': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31625219__WQA__P-2011-005636__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6682692__AGENDA__20040421__EN': Extracted text split into 89 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/129752-276_doublejeopardy_movietrivia_600': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/176758-3599_doublejeopardy_ruler_400': Extracted text split into 42 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/021975-2126_jeopardy_6letterwords_400': Extracted text split into 129 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/6935042__TA__P5-TA-2002-0613__EN': Extracted text split into 44 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/150037-1525_doublejeopardy_poetspoetry_1000': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/161722-4678_doublejeopardy_tobornottob_1200': Extracted text split into 60 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/8098853__WQ__E-2004-1636__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11169953__WQA__E-2005-2691__EN_ba4566': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_81259': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31914003__WQA__E-2011-007398__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30941693__WQA__E-2011-003489__EN_dc71f0': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/3580420__WQ__E-2004-0067__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/170172-3348_doublejeopardy_out_200': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/25958675__IMP-CONTRIB__20100616-CAN-76176__EN_00a29d': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/191092-4858_doublejeopardy_persona_2000': Extracted text split into 117 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/12015154__WQA__E-2006-0045__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31198018__WQ__E-2011-006717__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/080796-4267_doublejeopardy_getoutyourlibrarytablets_2000_f01bc0': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/27088511__WQ__E-2010-7639__EN_896966': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/193258-3057_doublejeopardy_wwii_600': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/26671863__IM-PRESS__20100902-IPR-80908__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/138315-3304_doublejeopardy_cartrouble_400': Extracted text split into 61 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/127828-5030_jeopardy_jepoetry_1000_aaf428': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20986883__WQ__E-2008-6795__EN_5a5788': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_35645': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32821557__WQA__E-2011-009383__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/072769-4340_doublejeopardy_california_800': Extracted text split into 140 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/25321930__WQA__E-2010-1082__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/743618__REPORT__A5-2004-0013__EN': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/9753268__QT__H-2005-0958__EN_7f4481': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_79520': Extracted text split into 39 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/23480979__WQ__E-2009-5019__EN_9be414': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25438257__WQ__E-2010-2988__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/100169-3301_jeopardy_onthemove_200_ce5865': Extracted text split into 123 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/22455609__WQA__E-2009-0573__EN_f7f44f': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24359854__WQA__E-2009-5662__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/537518__QT__H-2003-0368__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_32644': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/045822-4197_doublejeopardy_glassclass_2000': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/162126-4963_jeopardy_foodstuff_600': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/32822164__WQA__E-2011-009824__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31258309__WQ__E-2011-007542__EN_6600ad': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_62696': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22219978__OQ__O-2009-0073__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17175836__IMP-CONTRIB__20080114-CHE-18481__EN_e2c67b': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_37338': Extracted text split into 35 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/31827839__IM-PRESS__20114200-AGD-00000__EN_aad497': Extracted text split into 248 chunk(s).


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

  - Processing sample 'final_train/22222362__WQA__E-2009-1196__EN_c53545': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20885008__WQA__E-2008-2588__EN_c7ac1a': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16098649__REPORT__A6-2007-0414__EN': Extracted text split into 320 chunk(s).


Batches:   0%|          | 0/10 [00:00<?, ?it/s]

  - Processing sample 'final_train/6904811__TA__P5-TA-2004-0312__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_39696': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30190074__WQ__E-2011-004331__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/162242-3877_jeopardy_atthehotel_200': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/031203-1532_doublejeopardy_art_600': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/7053354__WQA__E-2002-0783__EN_2033dd': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/172032-2826_jeopardy_catchingsomezs_500': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/23183485__WQ__E-2009-4320__EN_823c5f': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/157736-2946_jeopardy_recurrences_400': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/207959-5049_doublejeopardy_britishpoets_800_403fb4': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/23477658__WQ__E-2009-4859__EN_7e2bc4': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6431998__MOTION__P5-RC-2001-0696__EN_c89421': Extracted text split into 75 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/33442332__WQ__E-2012-001668__EN_642624': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/037815-2961_doublejeopardy_filmsofthe70s_400': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/21107867__WQ__P-2009-0155__EN_c25aad': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6968082__TA__P5-TA-2002-0270__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/734100__REPORT__A5-2003-0486__EN_23a438': Extracted text split into 217 chunk(s).


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

  - Processing sample 'final_train/134498-3920_doublejeopardy_musicalflyers_200': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/125142-4391_jeopardy_number19inyourprograms_1000': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/067921-4719_jeopardy_islandhopping_1000_2cec40': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33271339__WQ__E-2012-001141__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17248459__WQA__P-2007-6200__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_68613': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16932261__WQA__E-2007-4395__EN_af4ca4': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/2299983__WQ__E-2003-1351__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_63821': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/096585-3560_doublejeopardy_literarytrilogies_400': Extracted text split into 99 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/30919726__WQ__E-2011-006262__EN_cbff18': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/185433-4911_jeopardy_astrologytime_400': Extracted text split into 144 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/32453189__IMP-CONTRIB__20111123-CHE-32250__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33082296__WQ__E-2012-000138__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20399699__WQA__E-2008-5232__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/199442-4835_doublejeopardy_tvshowaddresses_1200': Extracted text split into 57 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/6669686__TA__P5-TA-2004-0234__EN_74244c': Extracted text split into 321 chunk(s).


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

  - Processing sample 'final_train/28604494__WQ__E-2011-000335__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/010728-4472_jeopardy_suitsmejustfine_800': Extracted text split into 108 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/27003300__IM-PRESS__20100923-IPR-83450__EN': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/179727-4792_doublejeopardy_missinglinks_1200': Extracted text split into 44 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/31741031__WQ__E-2011-008603__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17312730__IMP-CONTRIB__20080124-CPU-19539__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12381322__WQ__E-2006-4313__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/066431-2351_doublejeopardy_musicappreciation_1000': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/17952966__WQA__E-2007-6188__EN_895af2': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/7053264__WQA__E-2002-0719__EN': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/160328-1196_doublejeopardy_newspapers_400': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/2305604__WQ__E-2003-1998__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32194088__WQA__E-2011-008223__EN_929280': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_22668': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/016722-3018_jeopardy_theblues_300': Extracted text split into 117 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_14726': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21618293__WQA__E-2008-7112__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/084055-2882_jeopardy_uscities_500': Extracted text split into 134 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/116877-3174_doublejeopardy_moviestars_1000': Extracted text split into 98 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/032034-3399_doublejeopardy_katharinehepburnfilms_800': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21819342__WQA__E-2009-0243__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_37814': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/063001-4510_doublejeopardy_fooddrinkbrands_1200_0e362d': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25211942__WQA__E-2010-1197__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32877119__WQ__E-2011-011909__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_33462': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10969921__TA__P6-TA-2006-0162__EN': Extracted text split into 53 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_58835': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/079180-3885_jeopardy_europeangeography_100': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/178212-4744_jeopardy_usgeography_400': Extracted text split into 61 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/8878625__MOTION__B6-2005-0258__EN': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_80423': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/172540-2999_jeopardy_americanhodgepodge_200': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_78518': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33239997__WQ__E-2012-000916__EN_6cf412': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/194185-3680_jeopardy_between_400': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/32821603__WQA__E-2011-009417__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/181434-4195_jeopardy_annualevents_400': Extracted text split into 135 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/16573074__WQA__E-2007-3999__EN': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27275537__OQ__O-2010-0155__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28308063__WQ__E-2010-010875__EN_889873': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/039461-1792_jeopardy_filmsofthe90s_400': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/23905881__IM-PRESS__20091127-STO-65459__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8418025__WQ__E-2004-2967__EN_dce818': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22040142__WQ__P-2009-2358__EN_6b9a5d': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_77699': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11923231__MOTION__B6-2006-0496__EN_40343d': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9208225__TA__20050707__EN_73e505': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_23546': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_10119': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9327246__TA__20050908__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15543174__WQ__P-2007-4309__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30580714__WQA__E-2011-002608__EN_abb502': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24666947__WQ__E-2010-1016__EN_f84305': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15610243__REPORT__A6-2007-0311__EN': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/008321-4946_jeopardy_demons_600': Extracted text split into 91 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/28885578__WQA__E-2010-9515__EN_04a5b2': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23183433__WQ__E-2009-4306__EN_ca13e6': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/046303-3004_doublejeopardy_pioneertrails_200': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/8475770__WQA__E-2003-3320__EN': Extracted text split into 20 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/159393-4659_jeopardy_benfranklin_400': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27883826__IM-PRESS__20101129-IPR-02708__EN': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/148611-3799_doublejeopardy_thenewyorktimessciencetimes_200': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/23261542__WQ__E-2009-4493__EN_591f12': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/072957-3148_doublejeopardy_commonbonds_1000': Extracted text split into 84 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/174811-2896_jeopardy_agriculture_100': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/058809-2632_doublejeopardy_novelsnovelists_400': Extracted text split into 144 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/16031248__TA__P6-TA-2007-0447__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/039175-838_doublejeopardy_booksauthors_200': Extracted text split into 58 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/19155268__IMP-CONTRIB__20080707-CAN-33512__EN_51b985': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6451975__MOTION__B5-1999-0027__EN_3904f4': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/072677-2971_jeopardy_portraitsonsavingsbonds_200': Extracted text split into 154 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/199193-4853_doublejeopardy_occupationaltv_800': Extracted text split into 129 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/9358835__WQ__E-2005-2343__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20924801__WQA__E-2008-6145__EN_8bd157': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31755076__WQA__E-2011-007055__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/195644-4938_jeopardy_marvelousmarvel_200_2eee66': Extracted text split into 140 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/120803-1309_doublejeopardy_literature_400': Extracted text split into 54 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/27088535__WQ__E-2010-7553__EN_89d093': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/204968-4553_doublejeopardy_technology_2000': Extracted text split into 137 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/035914-4304_jeopardy_beatleshitsinotherwords_200': Extracted text split into 40 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_23888': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18575896__WQA__E-2004-1829__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_44174': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/171324-3261_doublejeopardy_television_600_669a1e': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/039226-4921_jeopardy_filmographies_1000': Extracted text split into 41 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/26520932__WQ__E-2010-6088__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/168879-4731_finaljeopardy_the20thcentury_': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/19945710__WQ__E-2008-5271__EN_525c46': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/114389-1102_doublejeopardy_fashion_400': Extracted text split into 154 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/066663-3021_doublejeopardy_ballet_800': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23571923__WQA__E-2009-4286__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/140644-2860_doublejeopardy_annualevents_800': Extracted text split into 75 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/058822-2632_doublejeopardy_revolutionarywarfigures_1000': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/11682950__OQ__O-2006-0083__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22264003__PV__20090421__EN': Extracted text split into 30 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23556469__WQ__E-2009-5121__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_28209': Extracted text split into 26 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30712137__WQA__E-2011-003090__EN_732530': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/092162-3054_doublejeopardy_takingyourq_1000': Extracted text split into 152 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/8516009__QT__H-2004-0539__EN_cbf234': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/092821-1887_doublejeopardy_russianrulers_600': Extracted text split into 119 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/006323-3517_doublejeopardy_hometowns_400': Extracted text split into 46 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/27967318__OQ__O-2010-0200__EN_5ee8cb': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/154570-4273_jeopardy_basketballmovies_600': Extracted text split into 55 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/170857-3347_doublejeopardy_ghastlyoperaticdemises_1000': Extracted text split into 80 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/10767030__WQ__E-2005-0056__EN_b3bd61': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8577078__MOTION__B6-2005-0073__EN_fac56e': Extracted text split into 42 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/24605784__WQ__E-2010-0659__EN_6ae6e4': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/191003-3609_jeopardy_foreignnationalparks_500': Extracted text split into 57 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_4190': Extracted text split into 40 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_50995': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6427149__MOTION__B5-2001-0036__EN_c2352c': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/069307-3623_doublejeopardy_toughstuff_200_c9daf3': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_14380': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/104237-2868_doublejeopardy_blackamerica_800': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/071951-4890_doublejeopardy_entertainingjohns_1600': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/22300616__WQ__E-2009-2435__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10068369__WQ__E-2005-4498__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6424957__MOTION__B5-2001-0752__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22678468__IM-PRESS__20090607-STO-56909__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9441266__WQ__E-2005-2931__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10462919__WQ__P-2006-0256__EN_5468a4': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18419689__WQA__E-2008-0443__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26062810__WQ__E-2010-4160__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16173378__WQ__E-2007-5380__EN_6d81af': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25272991__WQA__E-2010-0280__EN_1d14b5': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_10258': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/021203-369_jeopardy_oddsends_400': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/12609203__WQ__E-2006-5082__EN_029564': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/068670-4653_doublejeopardy_ussities_2000': Extracted text split into 104 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_82075': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/174903-4807_doublejeopardy_gg_400_c6c6f7': Extracted text split into 64 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/052874-3252_doublejeopardy_80scinema_600': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/14689597__REPORT__A6-2007-0258__EN': Extracted text split into 44 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/32821041__WQA__E-2011-000125__EN_2bf87a': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32405393__WQ__E-2011-010260__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12944787__QT__H-2007-0028__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/093697-3716_jeopardy_fileunderm_500': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/6945800__TA__P5-TA-2002-0505__EN_2f8767': Extracted text split into 625 chunk(s).


Batches:   0%|          | 0/20 [00:00<?, ?it/s]

  - Processing sample 'final_train/136911-2940_jeopardy_literature_100': Extracted text split into 63 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/29569587__PV__20110323__EN_221c9e': Extracted text split into 187 chunk(s).


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

  - Processing sample 'final_train/24038505__WQ__E-2009-6140__EN_1ecf17': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30941804__WQA__E-2011-003651__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21775384__WQ__E-2009-1505__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17099707__WQA__E-2007-5366__EN_bab36f': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30941659__WQA__E-2011-003451__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/035944-4304_doublejeopardy_that70snews_400': Extracted text split into 72 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/30383482__WQ__E-2011-004793__EN_62f85e': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/110486-4831_jeopardy_thatsjustsad_800': Extracted text split into 137 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/14054157__WQA__P-2007-0755__EN_629ff9': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27582598__WQA__P-2010-8168__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/005490-2349_doublejeopardy_literature_400': Extracted text split into 51 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/10796802__WQ__P-2005-0356__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/195203-4882_jeopardy_loosechange_1000': Extracted text split into 138 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/018444-5052_jeopardy_leftovers_400': Extracted text split into 151 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/165319-3413_doublejeopardy_legendsofsports_1000': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/28206673__QT__H-2010-0655__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/109862-3851_jeopardy_irepresent_200_ce6201': Extracted text split into 51 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_82780': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19097728__WQA__E-2008-2168__EN': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/166164-4456_jeopardy_letterletter_200': Extracted text split into 47 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/098477-5190_doublejeopardy_jeopardyinpopculture_2000': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25526701__WQ__P-2010-3373__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32876883__WQ__P-2011-012029__EN_eec092': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_20526': Extracted text split into 35 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/146685-370_jeopardy_warstories_300': Extracted text split into 55 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/207420-4463_jeopardy_theworkingclass_800': Extracted text split into 149 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/33251702__WQA__E-2011-012003__EN_f4759d': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_23831': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/010747-4472_doublejeopardy_boxing_800': Extracted text split into 144 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/33711123__WQA__E-2012-001226__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/102517-3489_jeopardy_frogstuff_100': Extracted text split into 43 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/18168011__MOTION__B6-2008-0142__EN': Extracted text split into 53 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/072657-3805_doublejeopardy_supremecourtjustices_800': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/24638925__WQ__E-2010-0476__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/193223-3057_jeopardy_ajugofwine_200': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/27722571__QT__H-2010-0595__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11845710__REPORT__A6-2005-0062__EN': Extracted text split into 232 chunk(s).


Batches:   0%|          | 0/8 [00:00<?, ?it/s]

  - Processing sample 'final_train/10671989__WQ__P-2005-1184__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/18576576__WQA__E-2005-1298__EN': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30580762__WQA__E-2011-002703__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19942688__AGENDA__20081020__EN_968334': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/157429-3157_jeopardy_musicthekidslove_100': Extracted text split into 61 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/109763-3735_jeopardy_classictvtitleroles_500': Extracted text split into 90 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/29438828__WQA__E-2010-9486__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33239993__WQ__E-2012-000940__EN_4b0892': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_74698': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/202191-4124_jeopardy_bornonthefourthofjuly_800': Extracted text split into 149 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/5712416__WQA__E-2003-0648__EN': Extracted text split into 14 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31685649__WQ__E-2011-008566__EN_0c33d8': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/28556714__WQ__P-2011-000290__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/188663-5199_jeopardy_allcroatiagreatsmall_800': Extracted text split into 150 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_67453': Extracted text split into 35 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/148277-3600_doublejeopardy_youbrokemyrecord_800': Extracted text split into 43 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/28349208__WQA__E-2010-9193__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/214336-3034_doublejeopardy_msplumlyscompositionclass_400': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_81826': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_22100': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/142815-4281_jeopardy_meetthebeatle_400': Extracted text split into 69 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_39239': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/078064-3234_jeopardy_dinosaurlore_400': Extracted text split into 86 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/136083-4187_jeopardy_thecivilwar_200': Extracted text split into 54 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_78307': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_46644': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6847978__TA__P5-TA-2003-0267__EN_a37d68': Extracted text split into 76 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/9045853__TA__P6-TA-2005-0206__EN_bff70b': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31296848__WQ__E-2011-007720__EN_5c876b': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_13479': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/2324777__WQ__P-2003-3125__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_5473': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13179792__WQA__P-2006-5648__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/029876-2931_doublejeopardy_uspresidents_600': Extracted text split into 122 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/33681138__WQ__E-2012-002459__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/173850-4760_jeopardy_meteorology_400': Extracted text split into 71 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/3582362__WQ__E-2003-3963__EN_f2a911': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29446893__IM-PRESS__20110314-IPR-15468__EN_50d6e8': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_25357': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_76204': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/191928-2815_jeopardy_anatomy_300': Extracted text split into 106 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/180214-4603_doublejeopardy_longshots_2000': Extracted text split into 150 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/8580062__WQA__P-2003-4074__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31430537__WQA__E-2011-006125__EN_75bfc9': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/024286-5120_jeopardy_gemstones_600': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/26399470__WQ__E-2010-4988__EN_afb5e1': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_62002': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25873036__WQA__E-2010-1158__EN': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23843939__WQ__E-2009-5620__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/168323-2150_doublejeopardy_1922_600': Extracted text split into 44 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/30750205__WQA__E-2011-003027__EN_ac1bbb': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/036716-2579_jeopardy_familiarphrases_500': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/172584-2999_doublejeopardy_goodideas_1000': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/076865-3681_doublejeopardy_seacreatures_800': Extracted text split into 138 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/083010-3639_doublejeopardy_thecinema_800': Extracted text split into 152 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/3579442__WQ__E-2004-0116__EN_b26638': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/071942-4890_doublejeopardy_smallmammals_800': Extracted text split into 152 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/068334-3785_jeopardy_usgeography_400': Extracted text split into 157 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_25844': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15473423__REPORT__A6-2007-0281__EN_e655a0': Extracted text split into 262 chunk(s).


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

  - Processing sample 'final_train/096421-3857_doublejeopardy_generalassembly_800': Extracted text split into 83 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/143365-2895_doublejeopardy_onewomanshows_600': Extracted text split into 55 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/102638-3096_jeopardy_pyramid_100': Extracted text split into 90 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/12763290__WQA__E-2006-4414__EN_cec696': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/186894-4996_jeopardy_propernames_1000': Extracted text split into 51 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/25172884__WQ__E-2010-2298__EN_460b8d': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8380637__WQ__P-2004-2955__EN_e790ed': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30697349__IM-PRESS__20110614-IPR-21331__EN': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/21608227__WQ__E-2009-1137__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16351988__WQA__E-2007-4049__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_23470': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16655412__IM-PRESS__20061113-IPR-12543__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_83990': Extracted text split into 37 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/158122-4633_doublejeopardy_reruns_1200': Extracted text split into 73 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/013219-3804_jeopardy_howtoproposemarriage_500': Extracted text split into 133 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/12870942__WQA__E-2006-4750__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_16352': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9653925__WQ__P-2005-2098__EN_ee9d0f': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6407387__MOTION__B5-2002-0112__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13928686__QT__H-2007-0318__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/33127635__WQA__P-2011-012154__EN_da030a': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20446770__WQA__E-2008-5215__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/181108-2901_doublejeopardy_potluck_600': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/23408424__IMP-CONTRIB__20091019-CAN-62686__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/23126794__MOTION__B7-2009-0071__EN': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/090178-5115_doublejeopardy_thedutchroyalfamily_4000': Extracted text split into 149 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/26464207__WQ__E-2010-5813__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11268730__WQA__P-2005-0653__EN_ce15dc': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26092428__WQ__E-2010-4211__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27980203__IMP-CONTRIB__20101207-CAN-07888__EN_1e29c1': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_22232': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/5685794__REPORT__A5-2004-0108__EN': Extracted text split into 126 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/144915-3707_doublejeopardy_historicheroes_600': Extracted text split into 50 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/27093363__WQA__E-2010-6274__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10913876__WQ__E-2005-0473__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24975404__WQ__E-2010-1706__EN_4a5fe7': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10627959__QT__H-2006-0244__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/20511030__WQ__E-2008-6179__EN_202b1e': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/012635-2341_doublejeopardy_famousamericans_200': Extracted text split into 144 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/046364-14_doublejeopardy_americanindians_400': Extracted text split into 54 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/16031291__TA__P6-TA-2007-0454__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_12699': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/122050-4488_jeopardy_fitsutoat_600': Extracted text split into 72 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_48674': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/093587-2947_doublejeopardy_businesspeople_400': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/173684-3797_jeopardy_womenauthors_300': Extracted text split into 74 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/28682900__TA__P7-TA-2011-0032__EN': Extracted text split into 64 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/23066540__IMP-CONTRIB__20090903-CAL-60177__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/108416-5160_doublejeopardy_agoodbook_800': Extracted text split into 92 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/8695667__QT__H-2005-0161__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/22173489__WQA__E-2009-1251__EN_6e0ade': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/168710-3418_jeopardy_anagrammedmusicals_200': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/6846213__TA__P5-TA-2003-0232__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11119971__TA__P6-TA-2006-0221__EN': Extracted text split into 168 chunk(s).


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

  - Processing sample 'final_train/15170930__WQ__P-2007-4082__EN_807034': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29562774__WQ__E-2011-002701__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/101834-3965_doublejeopardy_crosswordcluesf_400': Extracted text split into 88 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/11575239__WQA__E-2006-1953__EN_fe7148': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/015352-2886_jeopardy_authors_400': Extracted text split into 86 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/6440842__MOTION__B5-2000-0656__EN_d87a7b': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/16857999__IM-PRESS__20061121-STO-00121__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_37415': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/102916-2055_doublejeopardy_architecture_600': Extracted text split into 149 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/19986791__IMP-CONTRIB__20081009-CAN-39074__EN': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/13995125__WQA__E-2006-5310__EN_c07b1f': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_72890': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/078570-3128_doublejeopardy_collegesuniversities_800': Extracted text split into 143 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/31796041__OQ__O-2011-000228__EN_56d021': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/15851440__QT__H-2007-0743__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/25404630__WQA__E-2010-0761__EN': Extracted text split into 18 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/169468-2123_doublejeopardy_geography_800': Extracted text split into 62 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_7855': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12917927__WQA__E-2006-4724__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/074750-2682_jeopardy_literaryhodgepodge_300': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/190174__REPORT__A5-2002-0173__EN': Extracted text split into 147 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/19427975__WQA__E-2008-3497__EN_01b0d1': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/26159963__WQA__E-2010-2618__EN': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/29233623__WQ__E-2011-001510__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30717389__WQ__E-2011-005700__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19558419__WQ__P-2008-4672__EN_60a5c7': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/087580-3732_jeopardy_dormcuisine_400_57cf9d': Extracted text split into 150 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/5685597__OQ__O-2004-0018__EN_24f868': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/6418288__WQ__P-2004-1063__EN_341bfd': Extracted text split into 16 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/170686-3897_jeopardy_americanhistory_200': Extracted text split into 66 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/16031325__TA__P6-TA-2007-0460__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/17513027__WQ__E-2008-0570__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/32470553__WQ__E-2011-010719__EN_d1e250': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/8835612__QT__H-2005-0226__EN_761349': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/037782-2961_jeopardy_cingeography_100': Extracted text split into 60 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/20061174__WQ__E-2008-5434__EN_b2abb6': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12859765__WQ__E-2006-5391__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31673555__WQA__E-2011-006954__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/043749-3790_jeopardy_yo_100': Extracted text split into 55 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/30121206__WQ__E-2011-004048__EN': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/161131-1330_doublejeopardy_the18thcentury_1000': Extracted text split into 60 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/30322173__WQ__E-2011-004684__EN': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/27605789__WQ__E-2010-8725__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/035107-3771_doublejeopardy_thebible_200': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/202499-5204_doublejeopardy_geographicfilmtv_400': Extracted text split into 139 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/11377549__WQA__E-2004-3118__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/100610-5106_doublejeopardy_onel_1200': Extracted text split into 144 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/160582-4210_doublejeopardy_maidsingreekmyth_2000': Extracted text split into 54 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_53828': Extracted text split into 25 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/10626734__WQ__P-2006-0053__EN': Extracted text split into 9 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_85133': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9301419__TA__P6-TA-2005-0321__EN': Extracted text split into 159 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/21112526__WQA__E-2008-6359__EN_b4bfd4': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/7359912__TA__P5-TA-1999-0057__EN': Extracted text split into 142 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/5729098__PRESS__DN-20040308-1__EN': Extracted text split into 35 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/007430-5094_jeopardy_beforetheywerefirstladies_1000': Extracted text split into 148 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/11404043__WQA__P-2004-2291__EN': Extracted text split into 15 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_13387': Extracted text split into 27 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_44532': Extracted text split into 19 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_84787': Extracted text split into 33 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/18258194__TA__P6-TA-2008-0161__EN': Extracted text split into 210 chunk(s).


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

  - Processing sample 'final_train/018632-4952_jeopardy_bangladeshslowly_600': Extracted text split into 154 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/089226-1486_doublejeopardy_famousquotes_600': Extracted text split into 47 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_29156': Extracted text split into 42 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/18019254__WQA__P-2008-0693__EN': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/204348-5121_jeopardy_historicalnovels_1000': Extracted text split into 157 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_30222': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/14855553__PV__20070712__EN': Extracted text split into 190 chunk(s).


Batches:   0%|          | 0/6 [00:00<?, ?it/s]

  - Processing sample 'final_train/24870335__REPORT__A7-2010-0032__EN_d43b8e': Extracted text split into 882 chunk(s).


Batches:   0%|          | 0/28 [00:00<?, ?it/s]

  - Processing sample 'final_train/26887797__MOTION__B7-2010-0526__EN_7c28ab': Extracted text split into 39 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/11162748__WQA__E-2005-1570__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/087773-2962_doublejeopardy_chemistry_200': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/150096-3150_doublejeopardy_classicforeignfilms_800': Extracted text split into 67 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/26172767__WQ__E-2010-4682__EN_72e7bc': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/401506__PRESS__TW-20030113-S__EN': Extracted text split into 476 chunk(s).


Batches:   0%|          | 0/15 [00:00<?, ?it/s]

  - Processing sample 'final_train/128239__QT__H-2002-0092__EN_af91c6': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/124442-3585_jeopardy_cornucopia_500': Extracted text split into 10 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/051831-3430_jeopardy_medalofhonorwinners_100': Extracted text split into 146 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/19366816__WQA__E-2008-2824__EN': Extracted text split into 1 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/097489-3649_jeopardy_biblicalpaintings_200': Extracted text split into 141 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/014321-3402_jeopardy_sitcomschange_100': Extracted text split into 145 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_23181': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/108102-4527_jeopardy_celebritybooks_600_5d7620': Extracted text split into 53 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/11173601__QT__H-2006-0473__EN_806aa9': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/11403091__WQA__E-2004-2283__EN_54987d': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/034279-3066_doublejeopardy_downunder_400_7dde9a': Extracted text split into 42 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/31142444__WQA__E-2011-005120__EN': Extracted text split into 13 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/30712342__WQA__E-2011-003466__EN': Extracted text split into 17 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/31904592__WQ__E-2011-009034__EN': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/076681-3296_doublejeopardy_alfredhitchcock_1400': Extracted text split into 2 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/111905-1331_jeopardy_fish_100': Extracted text split into 136 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_77714': Extracted text split into 29 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/199165-4853_jeopardy_familiarphrases_400': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/210006-4520_doublejeopardy_rabbi_2000': Extracted text split into 34 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/168777-3387_jeopardy_theyreoff_300': Extracted text split into 47 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/33363182__WQA__E-2011-012137__EN': Extracted text split into 11 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/201670-3208_doublejeopardy_nationalcoatsofarms_2000': Extracted text split into 150 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/027483-3434_doublejeopardy_mongolia_400_31b3a2': Extracted text split into 133 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/22846771__WQA__E-2009-3538__EN_9d4c75': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/19289521__IMP-CONTRIB__20080715-CAN-34218__EN': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/168839-4731_jeopardy_wintersportsreview_800': Extracted text split into 24 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/9170980__MOTION__B6-2005-0392__EN': Extracted text split into 45 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/18399450__QT__H-2008-0339__EN': Extracted text split into 5 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_45798': Extracted text split into 32 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_30237': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/166651-2357_jeopardy_nurseryrhymes_300': Extracted text split into 38 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/30134784__WQA__E-2011-002377__EN_abe0ef': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_14994': Extracted text split into 21 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/124446-3585_doublejeopardy_the8thcentury_200': Extracted text split into 68 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_18355': Extracted text split into 22 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24166585__WQ__P-2009-6760__EN': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/088330-5128_doublejeopardy_filmgods_1600': Extracted text split into 90 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/20091245__REPORT__A6-2008-0408__EN': Extracted text split into 278 chunk(s).


Batches:   0%|          | 0/9 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_65785': Extracted text split into 31 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/096686-3146_jeopardy_ontheauctionblock_1000': Extracted text split into 152 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/197449-3394_jeopardy_nationalanthems_500_1c2f20': Extracted text split into 65 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/085842-3467_jeopardy_anagrammedmusicals_200': Extracted text split into 77 chunk(s).


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

  - Processing sample 'final_train/21428652__WQ__E-2009-0818__EN_1d1320': Extracted text split into 12 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_73822': Extracted text split into 23 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/12917799__WQA__E-2006-4605__EN_e722ae': Extracted text split into 7 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/044765-4217_jeopardy_thenba_1000': Extracted text split into 52 chunk(s).


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

  - Processing sample 'final_train/7186882__TA__20001213__EN_d9c148': Extracted text split into 4 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/059304-3616_doublejeopardy_famousfemales_800_9680db': Extracted text split into 126 chunk(s).


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

  - Processing sample 'final_train/22473286__WQ__E-2009-2959__EN_1dda56': Extracted text split into 6 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/context_14635': Extracted text split into 28 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/24038469__WQ__E-2009-6128__EN_b8bb3a': Extracted text split into 8 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

  - Processing sample 'final_train/085563-4237_doublejeopardy_foreignfilms_400_07ff39': Extracted text split into 137 chunk(s).


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

  - Processing sample 'final_train/7394975__WQ__P-1999-1024__EN_484ca5': Extracted text split into 3 chunk(s).


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [15]:
print(f"\n--- Processing Complete ---")
print(f"Total number of chunks embedded: {len(embedded_chunks)}")

# Print details of the first 3 generated chunks for verification
print("\n--- Sample of Embedded Chunks ---")
for i, item in enumerate(embedded_chunks[:3]):
    print(f"\n[Chunk {i+1}]")
    print(f"  Original Key: {item['original_key']}")
    print(f"  Chunk Index:  {item['chunk_index']}")
    print(f"  Chunk Text:   '{item['chunk_text'][:100]}...'") # Preview of the text
    print(f"  Embedding Shape: {item['embedding'].shape}")


--- Processing Complete ---
Total number of chunks embedded: 2376512

--- Sample of Embedded Chunks ---

[Chunk 1]
  Original Key: final_train/21972291__WQ__P-2009-2118__EN
  Chunk Index:  0
  Chunk Text:   'WRITTEN QUESTION P-2118/09
by Witold Tomczak (IND/DEM)
to the Commission
(23 March 2009)
Subject: Pr...'
  Embedding Shape: (384,)

[Chunk 2]
  Original Key: final_train/21972291__WQ__P-2009-2118__EN
  Chunk Index:  1
  Chunk Text:   'In the above proposal, the Commission proposes inter alia a ban on the marketing of products from al...'
  Embedding Shape: (384,)

[Chunk 3]
  Original Key: final_train/21972291__WQ__P-2009-2118__EN
  Chunk Index:  2
  Chunk Text:   'of seals. In many cases, seals are pests posing a threat to fish stocks and their numbers needs to b...'
  Embedding Shape: (384,)


In [19]:
import pandas as pd
import pickle

In [20]:
print("\n--- Viewing and Saving Embeddings ---")

# --- Method 1: Viewing with Pandas DataFrame ---
# This is a great way to inspect your data in a structured table.
print("\nCreating Pandas DataFrame for easy viewing...")
df = pd.DataFrame(embedded_chunks)
# Display the first 5 rows of the dataframe.
# The 'embedding' column will show the numpy array.
print(df.head())



--- Viewing and Saving Embeddings ---

Creating Pandas DataFrame for easy viewing...
                                original_key  chunk_index  \
0  final_train/21972291__WQ__P-2009-2118__EN            0   
1  final_train/21972291__WQ__P-2009-2118__EN            1   
2  final_train/21972291__WQ__P-2009-2118__EN            2   
3  final_train/21972291__WQ__P-2009-2118__EN            3   
4  final_train/21972291__WQ__P-2009-2118__EN            4   

                                          chunk_text  \
0  WRITTEN QUESTION P-2118/09\nby Witold Tomczak ...   
1  In the above proposal, the Commission proposes...   
2  of seals. In many cases, seals are pests posin...   
3  hunts, which are subject to no controls whatso...   
4  poachers and might result in great suffering t...   

                                           embedding  
0  [-0.09138011, 0.030646732, -0.0027935628, -0.0...  
1  [-0.045915566, 0.03897162, 0.006841461, -0.047...  
2  [0.060897853, 0.07951124, 0.03683788, 0.03

In [21]:
# --- Method 2: Saving to Parquet (Recommended) ---
# Parquet is an efficient, column-oriented data format ideal for large datasets.
parquet_path = "/kaggle/working/embeddings.parquet"
print(f"\nSaving DataFrame to Parquet file: {parquet_path}")
df.to_parquet(parquet_path, index=False)
print("Saved successfully.")

# How to load it back:
# print("\nLoading data from Parquet file...")
# loaded_df = pd.read_parquet(parquet_path)
# print(loaded_df.head())


# --- Method 3: Saving to a Pickle file (Simple Alternative) ---
# Pickle is a Python-specific format that serializes the entire list object.
pickle_path = "/kaggle/working/embeddings.pkl"
print(f"\nSaving list to Pickle file: {pickle_path}")
with open(pickle_path, "wb") as f:
    pickle.dump(embedded_chunks, f)
print("Saved successfully.")

# How to load it back:
# print("\nLoading data from Pickle file...")
# with open(pickle_path, "rb") as f:
#     loaded_chunks = pickle.load(f)
# print(f"Loaded {len(loaded_chunks)} chunks from pickle.")


Saving DataFrame to Parquet file: /kaggle/working/embeddings.parquet
Saved successfully.

Saving list to Pickle file: /kaggle/working/embeddings.pkl
Saved successfully.


In [22]:
# How to load it back:
print("\nLoading data from Pickle file...")
with open('/kaggle/working/embeddings.pkl', "rb") as f:
    loaded_chunks = pickle.load(f)
print(f"Loaded {len(loaded_chunks)} chunks from pickle.")


Loading data from Pickle file...
Loaded 2376512 chunks from pickle.
