# Entrenamiento con Simple Transformers

## Paso 1: Cargar datos en Google Colab

## Paso 2: Instalar librerías

In [1]:
!pip install transformers evaluate torch --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
!pip install simpletransformers transformers datasets huggingface_hub scikit-learn

Collecting simpletransformers
  Downloading simpletransformers-0.70.5-py3-none-any.whl.metadata (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.3/43.3 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tensorboardx (from simpletransformers)
  Downloading tensorboardx-2.6.4-py3-none-any.whl.metadata (6.2 kB)
Collecting streamlit (from simpletransformers)
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit->simpletransformers)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading simpletransformers-

## Paso 3: Cargar librerías

In [4]:
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
import evaluate
import json
from tqdm import tqdm
from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs
from sklearn.model_selection import train_test_split

In [5]:
import logging
logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)

In [6]:
import torch
print("GPU available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))

GPU available: True
GPU name: Tesla T4


# Paso 4: Cargar datos

In [11]:
import json

with open("/content/qa_dataset_col_mex_news_squad2.json", "r", encoding="utf-8") as f:
    dataset = json.load(f)

print(f"✅ Loaded {len(dataset)} records successfully!")


✅ Loaded 1 records successfully!


In [12]:
dataset = dataset["data"]

In [13]:
# 🔧 1️⃣ Flatten your dataset so each row has 'context' and 'qas'
def flatten_squad(dataset):
    new_data = []
    for article in dataset:
        for para in article["paragraphs"]:
            new_data.append({
                "context": para["context"],
                "qas": para["qas"]
            })
    return new_data

flattened_data = flatten_squad(dataset)

# 🔧 2️⃣ Split into train and eval sets (70/30)
train_data, eval_data = train_test_split(flattened_data, test_size=0.3, random_state=42)

In [14]:
print(f"✅ Training samples: {len(train_data)}")
print(f"✅ Eval samples: {len(eval_data)}")

✅ Training samples: 2273
✅ Eval samples: 975


# Paso 5: Definir hiperparámetros

In [15]:
model_args = QuestionAnsweringArgs()

# Training behavior
model_args.train_batch_size = 8
model_args.eval_batch_size = 8
model_args.num_train_epochs = 4
model_args.learning_rate = 2e-5
model_args.gradient_accumulation_steps = 1
model_args.overwrite_output_dir = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_steps = 500
model_args.save_eval_checkpoints = False
model_args.save_model_every_epoch = False
model_args.save_steps = -1
model_args.best_model_dir = "./outputs/best_model/"
model_args.output_dir = "./outputs/"

# Optimization
model_args.max_seq_length = 384
model_args.doc_stride = 128
model_args.warmup_ratio = 0.1
model_args.max_answer_length = 30

# Logging
model_args.logging_steps = 100
model_args.evaluate_during_training_verbose = True
model_args.manual_seed = 42

# Resource handling
model_args.use_multiprocessing = False  # safer for notebooks
model_args.fp16 = torch.cuda.is_available()  # use mixed precision if CUDA available

# Paso 6: Cargar el modelo

In [16]:
model = QuestionAnsweringModel(
    model_type="bert",
    model_name="mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",  # BETO distilled
    args=model_args,
    use_cuda=torch.cuda.is_available()
)

config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

The following layers were not sharded: bert.encoder.layer.*.output.dense.weight, qa_outputs.bias, qa_outputs.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.embeddings.token_type_embeddings.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.intermediate.dense.bias, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.value.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.self.query.weight, bert.encoder.layer.*.attention.self.key.weight, bert.encode

tokenizer_config.json:   0%|          | 0.00/135 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

# Paso 7: Entrenamiento del modelo

In [17]:
model.train_model(train_data, eval_data=eval_data)

100%|██████████| 14684/14684 [06:39<00:00, 36.76it/s]
add example index and unique id: 100%|██████████| 14684/14684 [00:00<00:00, 735858.63it/s]


Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 4:   0%|          | 0/8646 [00:00<?, ?it/s]

  with amp.autocast():


convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:19<34:17:08, 19.65s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:20<02:39, 36.18it/s] [A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:25<01:30, 57.54it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:33<01:16, 61.29it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:39<00:25, 124.92it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:45<00:23, 110.16it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 123.93it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 468469.84it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]

  with amp.autocast():


convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:19<33:51:41, 19.41s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:26<03:42, 25.94it/s] [A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:26<01:25, 60.94it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:32<01:08, 68.79it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:33<00:16, 193.63it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:45<00:24, 106.08it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:52<00:00, 120.53it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 482513.56it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:25<44:12:07, 25.33s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:31<01:12, 64.70it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:37<00:26, 116.41it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:42<00:22, 114.11it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.18it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 456182.03it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:30<52:36:59, 30.16s/it][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:30<00:43, 96.50it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:25, 125.14it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:45<00:27, 94.52it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:52<00:00, 118.99it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 461107.73it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:28<49:55:33, 28.62s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:40<00:31, 99.57it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:27, 94.75it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 129.66it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 456055.69it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:23<41:17:52, 23.67s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:25<00:54, 85.94it/s][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:30<00:46, 90.03it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:36<00:26, 119.77it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:42<00:24, 108.16it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.33it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 459908.50it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:23<41:08:50, 23.58s/it][A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:25<01:29, 58.35it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:30<01:06, 70.60it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:37<00:25, 123.33it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:27, 95.01it/s] [A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:47<00:10, 154.93it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 128.08it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 444582.35it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:29<50:39:34, 29.04s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:36<00:27, 114.08it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:29, 89.83it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:47<00:00, 131.64it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 473249.12it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:17<31:21:14, 17.97s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:23<03:17, 29.18it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:30<01:04, 73.38it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:22, 137.71it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:48<00:29, 89.21it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.50it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 464023.00it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:29<51:27:12, 29.49s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:33<00:24, 129.38it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:43<00:26, 98.60it/s] [A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:48<00:13, 120.56it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 125.08it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 453613.91it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:26<45:44:23, 26.22s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:26<00:57, 82.40it/s][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:27<00:36, 113.52it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:39<00:30, 102.64it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:25, 102.63it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:46<00:10, 155.15it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 125.23it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 456854.35it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:26<46:53:53, 26.88s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:27<03:35, 26.70it/s] [A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:28<00:31, 133.74it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:22, 142.74it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:27, 93.92it/s] [A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:47<00:10, 153.73it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:52<00:00, 120.07it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 464268.28it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:25<43:56:50, 25.19s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:27<03:37, 26.42it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:32<01:02, 75.31it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:32<00:16, 186.88it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:42<00:16, 186.88it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:45<00:25, 104.09it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:45<00:09, 165.83it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.36it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 451043.66it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<47:47:49, 27.40s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:28<01:02, 75.66it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:24, 127.44it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:46<00:24, 127.44it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:28, 91.26it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 128.15it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 423332.17it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<48:46:34, 27.96s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:27, 115.79it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:46<00:27, 115.79it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:30, 86.78it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 128.31it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 401019.99it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:20<35:07:24, 20.13s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:30<04:25, 21.65it/s] [A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:34<00:22, 141.64it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:45<00:26, 100.75it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:46<00:10, 149.58it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 124.93it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 413415.41it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:28<49:09:53, 28.18s/it][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:30<00:44, 94.02it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:26, 120.84it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:27, 96.73it/s] [A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:46<00:10, 143.39it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 127.79it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 407242.93it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/6282 [00:28<49:26:19, 28.34s/it][A
convert squad examples to features:  25%|██▍       | 1570/6282 [00:34<01:19, 59.45it/s][A
convert squad examples to features:  58%|█████▊    | 3662/6282 [00:43<00:22, 116.72it/s][A
convert squad examples to features:  75%|███████▍  | 4708/6282 [00:48<00:11, 137.09it/s][A
convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 128.44it/s]

add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 450812.15it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/8646 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<47:38:25, 27.31s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:36<00:28, 111.69it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:30, 86.96it/s] [A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:30, 86.96it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:51<00:00, 121.86it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 460849.65it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<48:21:11, 27.71s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:28<01:00, 77.80it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:38<00:28, 108.57it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:27, 96.68it/s] [A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:49<00:11, 132.89it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 125.41it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 441987.08it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:21<37:23:06, 21.43s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:26<03:39, 26.24it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:34<01:14, 63.29it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:37<00:22, 138.98it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:43<00:21, 121.38it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.38it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 426684.44it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:26<45:58:24, 26.35s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:30<01:08, 68.94it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:24, 130.25it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:49<00:31, 83.16it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 125.45it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 419757.81it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:21<37:36:02, 21.55s/it][A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:24<01:32, 56.68it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:35<01:27, 53.90it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:40<00:27, 112.33it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:48<00:26, 97.71it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:53<00:00, 117.17it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 422585.33it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:30<52:52:56, 30.31s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:37<00:28, 110.12it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:28, 91.83it/s] [A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:48<00:11, 134.50it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:52<00:00, 120.38it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 414045.57it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:26<47:00:47, 26.95s/it][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:30<00:45, 92.51it/s][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:42<00:45, 92.51it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:42<00:34, 91.99it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:49<00:30, 87.20it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 124.45it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 451306.33it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:24<42:27:38, 24.34s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:28<03:50, 24.96it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:33<01:05, 71.47it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:39<00:24, 129.42it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:25, 103.92it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:49<00:10, 151.14it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:51<00:00, 123.02it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 409534.30it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:23<40:27:50, 23.19s/it][A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:27<01:40, 51.97it/s][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:30<00:41, 100.38it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:41<00:31, 100.36it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:43<00:22, 114.90it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:48<00:11, 142.61it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 127.96it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 411106.19it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:29<50:54:02, 29.17s/it][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:31<00:44, 93.53it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:33<00:22, 139.36it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:42<00:25, 104.08it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:51<00:00, 121.68it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 442692.55it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:28<50:03:32, 28.69s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:30<03:55, 24.47it/s] [A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:31<00:17, 179.61it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:44<00:17, 179.61it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 125.37it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 437713.76it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<48:42:20, 27.92s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:28<01:01, 76.61it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:36<00:26, 120.54it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:45<00:26, 98.57it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.30it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 429732.49it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:17<31:15:24, 17.92s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:26<03:54, 24.51it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:31<01:04, 72.60it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:21, 143.96it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:43<00:23, 110.31it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:49<00:11, 131.34it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 125.72it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 421557.65it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:24<42:05:17, 24.12s/it][A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:25<01:32, 56.67it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:29<01:01, 76.02it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:36<00:24, 130.77it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:24, 106.28it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 129.14it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 413162.59it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:26<46:44:30, 26.79s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:29<01:05, 72.27it/s][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:31<00:42, 97.65it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:38<00:26, 116.58it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:25, 103.27it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 124.18it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 416704.11it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:23<40:32:14, 23.23s/it][A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:26<01:38, 52.88it/s][A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:40<01:38, 52.88it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:43<00:33, 93.16it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:27, 96.87it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 123.39it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 417403.85it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:28<48:56:44, 28.05s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:29<03:50, 24.96it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:35<01:08, 68.42it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:18, 170.85it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:24, 104.83it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:47<00:09, 167.69it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 124.11it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 437648.33it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/6282 [00:24<42:46:59, 24.52s/it][A
convert squad examples to features:   8%|▊         | 524/6282 [00:26<03:28, 27.64it/s] [A
convert squad examples to features:  25%|██▍       | 1570/6282 [00:27<00:47, 99.95it/s][A
convert squad examples to features:  33%|███▎      | 2093/6282 [00:29<00:33, 126.79it/s][A
convert squad examples to features:  50%|████▉     | 3139/6282 [00:37<00:24, 128.74it/s][A
convert squad examples to features:  58%|█████▊    | 3662/6282 [00:42<00:21, 122.41it/s][A
convert squad examples to features:  67%|██████▋   | 4185/6282 [00:43<00:13, 152.83it/s][A
convert squad examples to features:  75%|███████▍  | 4708/6282 [00:49<00:12, 123.94it/s][A
convert squad examples to features: 100%|██████████| 6282/6282 [00:51<00:00, 123.12it/s]

add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 425554.26it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/8646 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:19<33:47:47, 19.37s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:21<02:55, 32.78it/s] [A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:26<01:33, 55.70it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:31<01:05, 71.50it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:34<00:19, 161.60it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:45<00:19, 161.60it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 129.71it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 436835.68it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:22<38:49:02, 22.25s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:30<04:22, 21.92it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:32<01:02, 75.33it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:33<00:16, 185.86it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:41<00:20, 126.92it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:47<00:10, 144.72it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.68it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 446601.88it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<47:58:20, 27.50s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:32<01:13, 64.20it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:23, 132.27it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:25, 103.55it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 128.01it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 396738.86it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:17<30:16:39, 17.35s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:21<03:02, 31.55it/s] [A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:27<01:42, 51.12it/s][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:30<00:37, 111.49it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:33<00:19, 164.99it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:28, 91.12it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 124.71it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 448060.02it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<48:30:38, 27.80s/it][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:28<00:41, 102.06it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:38<00:29, 105.46it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:28, 92.29it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 129.04it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 439495.22it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:26<46:20:53, 26.56s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:39<00:32, 98.09it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:28, 91.52it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:47<00:00, 131.69it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 436394.34it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:20<36:22:08, 20.85s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:27<03:50, 24.99it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:30<01:00, 77.27it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:21, 148.65it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:45<00:25, 102.32it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:48<00:11, 140.11it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 125.54it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 429872.71it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<47:08:53, 27.02s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:27<03:34, 26.86it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:31<00:59, 79.21it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:33<00:17, 184.31it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:24, 108.80it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 129.49it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 405525.56it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:19<34:15:46, 19.64s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:21<02:57, 32.52it/s] [A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:23<01:16, 68.88it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:34<01:20, 58.27it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:37<00:21, 143.55it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:42<00:20, 130.72it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:47<00:10, 150.36it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 127.09it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 466421.51it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:26<46:15:17, 26.51s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:34<01:19, 58.91it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:22, 139.32it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:28, 91.20it/s] [A[A

convert squad examples to features:  83%|████████▎ | 5231/6282 [00:48<00:06, 172.84it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 123.99it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 419764.50it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:28<50:03:55, 28.70s/it][A[A

convert squad examples to features:   0%|          | 2/6282 [00:28<20:47:52, 11.92s/it][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:29<00:29, 140.52it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:39<00:25, 122.97it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:42<00:19, 132.33it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:49<00:11, 139.63it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 123.45it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 423223.38it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:29<52:15:34, 29.95s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:34<00:25, 125.37it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:43<00:25, 100.90it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:51<00:00, 121.87it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 426898.75it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:26<46:08:21, 26.45s/it][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:27<00:39, 105.27it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:27, 114.38it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:30, 86.21it/s] [A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:48<00:11, 132.29it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 128.59it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 431378.81it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<47:18:34, 27.12s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:33<01:14, 63.12it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:43<00:23, 112.45it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 125.75it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 385868.11it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<47:20:37, 27.14s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:31<01:10, 67.13it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:34<00:22, 138.05it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:27, 94.17it/s] [A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:27, 94.17it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:46<00:10, 149.24it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:47<00:00, 131.51it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 444087.81it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:28<49:13:05, 28.21s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:27, 115.89it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:29, 89.09it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:51<00:00, 122.16it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 407614.64it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:18<33:03:51, 18.95s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:26<03:53, 24.68it/s] [A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:27<01:31, 56.95it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:28<00:49, 94.99it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:34<00:18, 167.20it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:23, 109.53it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 128.41it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 456253.12it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/6282 [00:20<35:43:31, 20.48s/it][A
convert squad examples to features:   8%|▊         | 524/6282 [00:26<03:47, 25.34it/s] [A
convert squad examples to features:  17%|█▋        | 1047/6282 [00:27<01:28, 59.12it/s][A
convert squad examples to features:  50%|████▉     | 3139/6282 [00:39<00:26, 118.77it/s][A
convert squad examples to features:  50%|████▉     | 3139/6282 [00:39<00:26, 118.77it/s][A
convert squad examples to features:  58%|█████▊    | 3662/6282 [00:49<00:27, 94.80it/s] [A
convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 125.88it/s]

add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 430371.23it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]

Running Epoch 4 of 4:   0%|          | 0/8646 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:28<48:51:43, 28.01s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:43<00:34, 89.93it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:49<00:29, 88.36it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:52<00:00, 119.97it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 428446.74it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:18<31:38:56, 18.14s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:20<02:47, 34.44it/s] [A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:22<01:11, 73.58it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:32<01:15, 62.03it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:33<00:19, 162.37it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:43<00:23, 111.65it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:47<00:11, 139.35it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:52<00:00, 118.58it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 419003.53it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:29<51:05:36, 29.28s/it][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:30<00:44, 94.82it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:36<00:26, 119.04it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:45<00:26, 97.80it/s] [A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:49<00:12, 128.95it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.11it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 420709.54it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:20<36:35:11, 20.97s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:29<04:19, 22.16it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:33<01:08, 69.01it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:34<00:18, 172.50it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:45<00:18, 172.50it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:47<00:26, 99.82it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.51it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 288764.63it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:21<37:01:17, 21.22s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:25<03:25, 28.02it/s] [A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:27<01:32, 56.88it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:31<01:00, 78.01it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:40<00:26, 119.64it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:20, 128.43it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:46<00:08, 179.61it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:51<00:00, 121.59it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 425204.02it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:22<39:40:34, 22.74s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:27<03:45, 25.54it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:31<01:01, 77.04it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:36<00:21, 145.95it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:23, 109.41it/s][A[A

convert squad examples to features:  75%|███████▍  | 4708/6282 [00:48<00:10, 146.20it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.50it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 285854.27it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:29<50:57:22, 29.21s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:40<00:31, 98.81it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:26, 97.06it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 124.77it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 431612.00it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:29<52:09:54, 29.90s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:33<00:24, 129.50it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:43<00:24, 129.50it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:27, 97.00it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:51<00:00, 121.03it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 454772.65it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:21<37:16:45, 21.37s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:25<03:27, 27.70it/s] [A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:26<01:26, 60.24it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:33<01:08, 68.92it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:21, 124.30it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:51<00:00, 122.79it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 424840.66it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:25<45:04:09, 25.83s/it][A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:27<01:40, 51.94it/s][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:29<00:37, 110.47it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:38<00:27, 115.98it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.81it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 408012.29it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:28<49:51:31, 28.58s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:29<01:02, 75.75it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:24, 128.30it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:26, 100.14it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:52<00:00, 120.35it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 416144.70it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:20<35:34:28, 20.39s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:27<03:51, 24.85it/s] [A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:28<00:54, 86.95it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:36<00:23, 136.24it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:42<00:22, 116.25it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 126.04it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 401074.93it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:29<51:12:19, 29.35s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:29<01:03, 74.52it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:33<00:21, 146.48it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:43<00:21, 146.48it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:45<00:27, 95.58it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 129.47it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 451321.80it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<48:17:10, 27.68s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:28<01:00, 77.26it/s][A[A

convert squad examples to features:  33%|███▎      | 2093/6282 [00:29<00:39, 105.29it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:36<00:25, 125.43it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:28, 91.81it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 129.38it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 397199.37it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:27<48:42:57, 27.92s/it][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:31<01:09, 68.03it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:36<00:24, 128.84it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:25, 103.09it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:48<00:00, 129.04it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 323868.16it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:20<36:14:25, 20.77s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:29<04:14, 22.62it/s] [A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:38<00:26, 120.07it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:24, 109.11it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:49<00:00, 127.21it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 366732.33it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:28<50:20:36, 28.85s/it][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:26, 117.26it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:45<00:28, 93.41it/s] [A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 123.45it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 394269.22it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6282 [00:23<40:53:08, 23.43s/it][A[A

convert squad examples to features:   8%|▊         | 524/6282 [00:24<03:12, 29.93it/s] [A[A

convert squad examples to features:   9%|▉         | 572/6282 [00:24<02:52, 33.17it/s][A[A

convert squad examples to features:  17%|█▋        | 1047/6282 [00:25<01:05, 79.74it/s][A[A

convert squad examples to features:  25%|██▍       | 1570/6282 [00:32<00:58, 80.30it/s][A[A

convert squad examples to features:  50%|████▉     | 3139/6282 [00:35<00:17, 180.32it/s][A[A

convert squad examples to features:  58%|█████▊    | 3662/6282 [00:44<00:22, 118.74it/s][A[A

convert squad examples to features: 100%|██████████| 6282/6282 [00:50<00:00, 124.09it/s]


add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 441483.49it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/6282 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/6282 [00:23<41:20:07, 23.69s/it][A
convert squad examples to features:   0%|          | 2/6282 [00:23<17:16:10,  9.90s/it][A
convert squad examples to features:   8%|▊         | 524/6282 [00:29<03:05, 30.96it/s] [A
convert squad examples to features:  25%|██▍       | 1570/6282 [00:32<00:49, 95.16it/s][A
convert squad examples to features:  50%|████▉     | 3139/6282 [00:33<00:15, 208.17it/s][A
convert squad examples to features:  58%|█████▊    | 3662/6282 [00:46<00:23, 109.29it/s][A
convert squad examples to features:  75%|███████▍  | 4708/6282 [00:50<00:11, 142.91it/s][A
convert squad examples to features: 100%|██████████| 6282/6282 [00:51<00:00, 122.51it/s]

add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 376591.74it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]

(34584,
 {'global_step': [500,
   1000,
   1500,
   2000,
   2500,
   3000,
   3500,
   4000,
   4500,
   5000,
   5500,
   6000,
   6500,
   7000,
   7500,
   8000,
   8500,
   8646,
   9000,
   9500,
   10000,
   10500,
   11000,
   11500,
   12000,
   12500,
   13000,
   13500,
   14000,
   14500,
   15000,
   15500,
   16000,
   16500,
   17000,
   17292,
   17500,
   18000,
   18500,
   19000,
   19500,
   20000,
   20500,
   21000,
   21500,
   22000,
   22500,
   23000,
   23500,
   24000,
   24500,
   25000,
   25500,
   25938,
   26000,
   26500,
   27000,
   27500,
   28000,
   28500,
   29000,
   29500,
   30000,
   30500,
   31000,
   31500,
   32000,
   32500,
   33000,
   33500,
   34000,
   34500,
   34584],
  'correct': [3416,
   3669,
   3949,
   4009,
   4051,
   4206,
   4099,
   4152,
   4237,
   4239,
   4213,
   4257,
   4269,
   4215,
   4214,
   4143,
   4154,
   4278,
   4219,
   4257,
   4298,
   4270,
   4177,
   4192,
   4182,
   4210,
   4294,
   4268,
   4

# Paso 8: Evaluación de los resultados

In [24]:
result, texts = model.eval_model(eval_data)
print("📊 Evaluation results:")
print(result)

convert squad examples to features: 100%|██████████| 6282/6282 [00:55<00:00, 113.84it/s]
add example index and unique id: 100%|██████████| 6282/6282 [00:00<00:00, 434186.66it/s]


Running Evaluation:   0%|          | 0/3883 [00:00<?, ?it/s]

📊 Evaluation results:
{'correct': 4344, 'similar': 1605, 'incorrect': 333, 'eval_loss': -9.376533125160957}


In [106]:
#eval_data[0]

# Paso 7: Guardar los resultados

In [97]:
import os

# Folder to save
local_path = "./QA_model"
os.makedirs(local_path, exist_ok=True)

# Save the Hugging Face model & tokenizer directly
model.model.save_pretrained(local_path)       # Saves weights + config
model.tokenizer.save_pretrained(local_path)   # Saves vocab + tokenizer config

# Check files
!ls -l ./QA_model


total 427080
-rw-r--r-- 1 root root       615 Oct 25 14:27 config.json
-rw-r--r-- 1 root root 437070648 Oct 25 14:27 model.safetensors
-rw-r--r-- 1 root root       125 Oct 25 14:27 special_tokens_map.json
-rw-r--r-- 1 root root      1293 Oct 25 14:27 tokenizer_config.json
-rw-r--r-- 1 root root    242349 Oct 25 14:27 vocab.txt


In [98]:
import shutil

shutil.make_archive("QA_model", 'zip', local_path)
print("✅ Zipped model")
!ls -lh QA_model.zip

✅ Zipped model
-rw-r--r-- 1 root root 387M Oct 25 14:28 QA_model.zip


In [99]:
from google.colab import files
files.download("QA_model.zip")  # download to your computer

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [100]:
# Or move to Drive
import shutil
shutil.move("QA_model.zip", "/content/drive/MyDrive/Thesis_QA_Optimization/Model")


'/content/drive/MyDrive/Thesis_QA_Optimization/Model/QA_model.zip'

In [101]:
from simpletransformers.question_answering import QuestionAnsweringModel

# Path to the folder containing the saved model
model_path = "./QA_model"  # change if different

# Reload the model
my_model = QuestionAnsweringModel(
    "bert",
    model_path,
    use_cuda=True  # set to False if no GPU
)

The following layers were not sharded: bert.encoder.layer.*.output.dense.weight, qa_outputs.bias, qa_outputs.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.embeddings.token_type_embeddings.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.intermediate.dense.bias, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.value.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.self.query.weight, bert.encoder.layer.*.attention.self.key.weight, bert.encode

In [104]:
# Context & question
context = "Ciudad de México. El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez, fue hallado muerto en Tamaulipas."
question = "¿Quién fue hallado muerto en Tamaulipas?"

# Prepare input in SimpleTransformers format
to_predict = [
    {
        "context": context,
        "qas": [
            {
                "id": "0",
                "question": question,
                "answers": [{"text": " ", "answer_start": 0}],
                "is_impossible": False
            }
        ]
    }
]

# Run prediction
answers = my_model.predict(to_predict)
print(answers)

convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 227.31it/s]
add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 12052.60it/s]


Running Prediction:   0%|          | 0/1 [00:00<?, ?it/s]

([{'id': '0', 'answer': ['Abraham Jeremías Pérez Ramírez,', 'Abraham', 'El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Ramírez', 'capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez', 'Abraham Jeremías Pérez Ramírez, fue hallado muerto', 'Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue hallado muerto en Tamaulipas.', 'Pérez Ramírez', 'Ciudad de México. El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías', 'Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue hallado muerto en', 'Abraham Jeremías Pérez Ramírez, fue', 'México. El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue hallado', '', 'El capitán de la Secretaría de Marina, Abraham']}], [{'id': '0', 'probability': [0.9999867335032386, 3.3964265149031443e-06, 3.065420614261552e-06, 2.982736123103299e-06, 6.05979598924565e-07, 5.

  with amp.autocast():


In [105]:
# Run prediction
answers = model.predict(to_predict)
print(answers)

convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 228.82it/s]
add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 8756.38it/s]


Running Prediction:   0%|          | 0/1 [00:00<?, ?it/s]

([{'id': '0', 'answer': ['Abraham Jeremías Pérez Ramírez,', 'Abraham', 'El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Ramírez', 'capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez', 'Abraham Jeremías Pérez Ramírez, fue hallado muerto', 'Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue hallado muerto en Tamaulipas.', 'Pérez Ramírez', 'Ciudad de México. El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías', 'Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue hallado muerto en', 'Abraham Jeremías Pérez Ramírez, fue', 'México. El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue hallado', '', 'El capitán de la Secretaría de Marina, Abraham']}], [{'id': '0', 'probability': [0.9999867373084392, 3.393111324061626e-06, 3.065420625926247e-06, 2.9827361344533594e-06, 6.036171106566265e-07, 5

# Evaluation

In [9]:
#model_name = "mrm8488/bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
model_name = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"

qa_pipeline = pipeline(
    "question-answering",
    model=model_name,
    tokenizer=model_name
)

config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

Some weights of the model checkpoint at mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/135 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cuda:0


In [56]:
contexto = """
Gabriel García Márquez fue un novelista colombiano, ganador del Premio Nobel de Literatura en 1982.
Es conocido por obras como 'Cien años de soledad' y 'El amor en los tiempos del cólera'.
"""

pregunta = "¿Quién ganó el Premio Nobel de Literatura en 1982?"

resultado = qa_pipeline({
    "context": contexto,
    "question": pregunta
})

print(resultado)

NameError: name 'qa_pipeline' is not defined

In [None]:
from tqdm import tqdm

predictions = []
references = []

for article in tqdm(dataset["data"]):
    for paragraph in article["paragraphs"]:
        context = paragraph["context"]
        for qa in paragraph["qas"]:
            question = qa["question"]

            # Model prediction
            pred = qa_pipeline(question=question, context=context)

            predictions.append({
                "id": qa["id"],
                "prediction_text": pred.get("answer", ""),
                "no_answer_probability": 1 - pred.get("score", 0)
            })

            # Handle multiple gold answers if present
            gold_texts = [ans["text"] for ans in qa["answers"]]
            gold_starts = [ans.get("answer_start", 0) for ans in qa["answers"]]

            # ✅ FIX: include both "text" and "answer_start"
            references.append({
                "id": qa["id"],
                "answers": {
                    "text": gold_texts,
                    "answer_start": gold_starts
                }
            })



  0%|          | 0/3248 [00:00<?, ?it/s][A
  0%|          | 1/3248 [00:00<31:07,  1.74it/s][A
  0%|          | 2/3248 [00:00<17:10,  3.15it/s][A
  0%|          | 3/3248 [00:01<17:16,  3.13it/s][A
  0%|          | 4/3248 [00:01<19:26,  2.78it/s][A
  0%|          | 5/3248 [00:01<18:22,  2.94it/s][A
  0%|          | 6/3248 [00:02<19:48,  2.73it/s][A
  0%|          | 8/3248 [00:02<14:15,  3.79it/s][A
  0%|          | 9/3248 [00:02<12:50,  4.21it/s][A
  0%|          | 10/3248 [00:03<16:36,  3.25it/s][A
  0%|          | 11/3248 [00:03<14:40,  3.68it/s][A
  0%|          | 12/3248 [00:03<13:31,  3.99it/s][A
  0%|          | 13/3248 [00:03<12:43,  4.24it/s][A
  0%|          | 14/3248 [00:03<12:50,  4.20it/s][A
  0%|          | 15/3248 [00:04<16:43,  3.22it/s][A
  0%|          | 16/3248 [00:04<14:31,  3.71it/s][A
  1%|          | 17/3248 [00:05<17:50,  3.02it/s][A
  1%|          | 18/3248 [00:05<25:12,  2.14it/s][A
  1%|          | 19/3248 [00:06<20:23,  2.64it/s][A
  1%|    

In [None]:
metric = evaluate.load("squad_v2")

In [None]:
references

[{'id': 'qa-17811', 'answers': {'text': ['Illinois'], 'answer_start': [153]}},
 {'id': 'qa-17812',
  'answers': {'text': ['Tuxedomoon'], 'answer_start': [278]}},
 {'id': 'qa-17813',
  'answers': {'text': ['Qué Viva México'], 'answer_start': [2502]}},
 {'id': 'qa-17814', 'answers': {'text': ['1993'], 'answer_start': [626]}},
 {'id': 'qa-17815',
  'answers': {'text': ['fiesta de la Candelaria'], 'answer_start': [2698]}},
 {'id': 'qa-17816', 'answers': {'text': [], 'answer_start': []}},
 {'id': 'qa-17817', 'answers': {'text': [], 'answer_start': []}},
 {'id': 'qa-17778',
  'answers': {'text': ['La funcionaria'], 'answer_start': [2]}},
 {'id': 'qa-17779',
  'answers': {'text': ['19 por ciento'], 'answer_start': [478]}},
 {'id': 'qa-17781',
  'answers': {'text': ['73.7 por ciento'], 'answer_start': [1730]}},
 {'id': 'qa-17782', 'answers': {'text': [], 'answer_start': []}},
 {'id': 'qa-17938',
  'answers': {'text': ['Chimalistac'], 'answer_start': [363]}},
 {'id': 'qa-17939',
  'answers': {'

In [None]:
results = metric.compute(predictions=predictions, references=references)

In [None]:
metric = evaluate.load("squad_v2")
results = metric.compute(predictions=predictions, references=references)
print(results)

{'exact': 40.33673566727082, 'f1': 52.04827644347888, 'total': 20966, 'HasAns_exact': 58.15441783649876, 'HasAns_f1': 75.0512086370753, 'HasAns_total': 14532, 'NoAns_exact': 0.09325458501709667, 'NoAns_f1': 0.09325458501709667, 'NoAns_total': 6434, 'best_exact': 55.33244300295717, 'best_exact_thresh': 0.6415619850158691, 'best_f1': 64.63131456858878, 'best_f1_thresh': 0.7467862963676453}


In [None]:
print("📊 Evaluation Results:")
print(f"Exact Match (EM): {results['exact']:.2f}%")
print(f"F1 Score: {results['f1']:.2f}%")
print(f"Total Examples: {results['total']}")
print(f"Has Answer (EM): {results.get('HasAns_exact', 0):.2f}%")
print(f"No Answer (EM): {results.get('NoAns_exact', 0):.2f}%")


📊 Evaluation Results:
Exact Match (EM): 40.34%
F1 Score: 52.05%
Total Examples: 20966
Has Answer (EM): 58.15%
No Answer (EM): 0.09%
