# Entrenamiento con Simple Transformers del model Distill BERT

## Paso 1: Cargar datos en Google Colab

## Paso 2: Instalar librer√≠as

In [1]:
!pip install transformers evaluate torch --quiet
!pip install simpletransformers transformers datasets huggingface_hub scikit-learn
!pip install evaluate --quiet

[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m84.1/84.1 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting simpletransformers
  Downloading simpletransformers-0.70.5-py3-none-any.whl.metadata (43 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m43.3/43.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m43.6/43.6 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tensorboardx (from simpletransformers)
  Downloading tensorboardx-2.6.4-py3-none-any.whl.metadata (6.2 kB)
Collecting streamlit (from 

## Paso 3: Cargar librer√≠as

In [2]:
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
import evaluate
import json
import os
import shutil
import requests


from tqdm import tqdm
from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs
from sklearn.model_selection import train_test_split
from google.colab import files

In [3]:
import logging
logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)

In [4]:
import torch
print("GPU available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))

GPU available: True
GPU name: Tesla T4


## Paso 4: Cargar datos

In [5]:
# URLs of the files
urls = {
    "eval_colombia_mexico_dataset.json": "https://github.com/BlueAutomata/tesis-optimizacion-de-modelos-de-question-answering/raw/refs/heads/master/src/datasets/exploration_datasets/gold/eval_colombia_mexico_dataset.json",
    "train_colombia_mexico_dataset.json": "https://github.com/BlueAutomata/tesis-optimizacion-de-modelos-de-question-answering/raw/refs/heads/master/src/datasets/exploration_datasets/gold/train_colombia_mexico_dataset.json"
}

# Dictionary to store the loaded JSON data
datasets = {}

for filename, url in urls.items():
    # Download the file
    response = requests.get(url)
    if response.status_code == 200:
        # Save locally
        with open(filename, "wb") as f:
            f.write(response.content)
        # Load JSON into Python
        datasets[filename] = response.json()
        print(f"{filename} downloaded and loaded successfully!")
    else:
        print(f"Failed to download {filename}. Status code: {response.status_code}")

eval_colombia_mexico_dataset.json downloaded and loaded successfully!
train_colombia_mexico_dataset.json downloaded and loaded successfully!


In [6]:
with open("train_colombia_mexico_dataset.json", "r", encoding="utf-8") as f:
    train_dataset = json.load(f)

print(f"‚úÖ Loaded {len(train_dataset)} records successfully!")

‚úÖ Loaded 1 records successfully!


In [7]:
with open("eval_colombia_mexico_dataset.json", "r", encoding="utf-8") as f:
    eval_dataset = json.load(f)

print(f"‚úÖ Loaded {len(eval_dataset)} records successfully!")

‚úÖ Loaded 1 records successfully!


In [8]:
train_dataset = train_dataset["data"]

In [9]:
eval_dataset = eval_dataset["data"]

In [10]:
# üîß 1Ô∏è‚É£ Flatten your dataset so each row has 'context' and 'qas'
def flatten_squad(dataset):
    new_data = []
    for article in dataset:
        for para in article["paragraphs"]:
            new_data.append({
                "context": para["context"],
                "qas": para["qas"]
            })
    return new_data

In [11]:
train_data = flatten_squad(train_dataset)

In [12]:
eval_data = flatten_squad(eval_dataset)

In [13]:
print(f"‚úÖ Training samples: {len(train_data)}")
print(f"‚úÖ Eval samples: {len(eval_data)}")

‚úÖ Training samples: 4320
‚úÖ Eval samples: 3846


## Paso 5: Definir hiperpar√°metros

In [14]:
model_args = QuestionAnsweringArgs()

# ========================
# ‚öôÔ∏è Training configuration
# ========================
model_args.train_batch_size = 8
model_args.eval_batch_size = 8
model_args.num_train_epochs = 4                # slightly longer training for better convergence
model_args.learning_rate = 3e-5                # standard for BERT fine-tuning
model_args.gradient_accumulation_steps = 4     # effective batch size = 8 √ó 4 = 32
model_args.warmup_ratio = 0.1
model_args.weight_decay = 0.01                 # regularization to prevent overfitting
model_args.overwrite_output_dir = True

# ========================
# üß† Evaluation & Early Stopping
# ========================
model_args.evaluate_during_training = True
model_args.evaluate_during_training_steps = 500
model_args.evaluate_during_training_verbose = True
model_args.save_model_every_epoch = False
model_args.save_eval_checkpoints = True
model_args.save_best_model = True
model_args.metric_for_best_model = "f1"
model_args.greater_is_better = True
model_args.early_stopping_metric = "eval_loss"
model_args.early_stopping_patience = 2
model_args.early_stopping_consider_epochs = True

# ========================
# üìÑ Sequence & Context Settings
# ========================
model_args.max_seq_length = 384       # total sequence length
model_args.doc_stride = 128           # sliding window overlap
model_args.max_query_length = 64      # question length
model_args.max_answer_length = 30

# ========================
# üß© Output & Logging
# ========================
model_args.output_dir = "./outputs/"
model_args.best_model_dir = "./outputs/best_model/"
model_args.logging_steps = 100
model_args.manual_seed = 42

# ========================
# ‚ö° Resource Handling
# ========================
model_args.use_multiprocessing = False             # safer for notebooks
model_args.fp16 = torch.cuda.is_available()        # enable mixed precision if GPU available
model_args.dataloader_num_workers = 2              # small boost in performance
model_args.save_steps = -1                         # don't save intermediate steps

print("‚úÖ Model arguments configured successfully!")

‚úÖ Model arguments configured successfully!


## Paso 6: Cargar el modelo

In [15]:
model_original = QuestionAnsweringModel(
    model_type="bert",
    model_name="mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",  # BETO
    args=model_args,
    use_cuda=torch.cuda.is_available()
)

config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

The following layers were not sharded: bert.encoder.layer.*.output.dense.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.key.bias, qa_outputs.bias, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.LayerNorm.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.word_embeddings.weight, bert.enco

tokenizer_config.json:   0%|          | 0.00/135 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [16]:
model = QuestionAnsweringModel(
    model_type="bert",
    model_name="mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",  # BETO
    args=model_args,
    use_cuda=torch.cuda.is_available()
)

The following layers were not sharded: bert.encoder.layer.*.output.dense.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.key.bias, qa_outputs.bias, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.LayerNorm.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.word_embeddings.weight, bert.enco

## Paso 7: Entrenamiento del modelo

In [17]:
model.train_model(train_data, eval_data=eval_data)

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 19487/19487 [09:59<00:00, 32.52it/s]
add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 19487/19487 [00:00<00:00, 670069.45it/s]


Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 4:   0%|          | 0/13019 [00:00<?, ?it/s]

  with amp.autocast():


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:28<65:44:02, 28.34s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:30<03:58, 32.16it/s] [A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:37<02:12, 52.65it/s][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:38<00:41, 134.48it/s][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:50<00:41, 134.48it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:56<00:42, 98.97it/s] [A[A

convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [00:58<00:29, 119.06it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:05<00:23, 117.13it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

  with amp.autocast():


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:35<82:45:05, 35.67s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:37<04:51, 26.23it/s] [A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:42<00:51, 109.11it/s][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:55<00:51, 109.11it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:55<00:38, 108.10it/s][A[A

convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [00:56<00:25, 134.62it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:08<00:27, 102.36it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:20<00:27, 102.36it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:23<54:04:16, 23.31s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:31<04:32, 28.05it/s] [A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:41<02:39, 43.71it/s][A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:53<02:39, 43.71it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:58<00:41, 100.01it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:07<00:23, 115.97it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:19<00:23, 115.97it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:22<00:00, 100.93it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:34<79:16:38, 34.18s/it][A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:39<02:26, 47.59it/s][A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:51<02:26, 47.59it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:56<00:41, 99.41it/s][A[A

convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [00:58<00:29, 118.87it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:05<00:24, 112.01it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:16<00:24, 112.01it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:19<00:00, 105.14it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/83

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:35<81:59:58, 35.35s/it][A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:36<02:10, 53.39it/s][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:38<01:16, 82.02it/s][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:53<01:16, 82.02it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:56<00:41, 101.19it/s][A[A

convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [01:00<00:30, 112.34it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:07<00:25, 111.28it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:18<00:25, 111.28it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:32<76:26:23, 32.95s/it][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:34<01:13, 84.82it/s][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:42<01:04, 86.49it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:50<00:37, 112.10it/s][A[A

convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [01:00<00:35, 99.26it/s] [A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:06<00:27, 102.98it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:16<00:27, 102.98it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:16<00:00, 109.07it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/8352 [00:28<67:13:21, 28.98s/it][A
convert squad examples to features:   8%|‚ñä         | 697/8352 [00:30<03:55, 32.50it/s] [A
convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:38<02:15, 51.45it/s][A
convert squad examples to features:  17%|‚ñà‚ñã        | 1424/8352 [00:38<02:11, 52.73it/s][A
convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:39<00:35, 157.26it/s][A
convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:45<00:34, 141.45it/s][A
convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:55<00:39, 104.56it/s][A
convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:07<00:25, 110.11it/s][A
convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:20<00:25, 110.11it/s][A
convert squad examples to fea

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/13019 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:32<75:54:49, 32.73s/it][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:35<01:16, 82.08it/s][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:51<01:16, 82.08it/s][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:51<01:28, 62.89it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:52<00:55, 88.31it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [01:01<00:48, 86.34it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:04<00:20, 139.14it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:14<00:20, 139.14it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:30<69:43:51, 30.06s/it][A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:39<02:30, 46.27it/s][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:40<01:24, 74.26it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:41<00:31, 153.88it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:55<00:31, 153.88it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [01:03<00:55, 74.63it/s] [A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:03<00:21, 131.13it/s][A[A

convert squad examples to features:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 6961/8352 [01:04<00:06, 208.68it/s][A[A

convert squad examples to features:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 6961/8

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:28<66:44:53, 28.77s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:37<05:12, 24.50it/s] [A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:42<00:38, 128.14it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:55<00:38, 128.14it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:57<00:45, 90.80it/s] [A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:07<00:25, 107.50it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:18<00:25, 107.50it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:22<00:00, 101.02it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 835

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:30<71:54:25, 31.00s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:34<04:39, 27.36it/s] [A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:36<01:05, 95.83it/s][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:42<00:53, 104.87it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:58<00:44, 94.68it/s] [A[A

convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [01:01<00:31, 108.78it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:05<00:22, 123.34it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:18<00:00, 106.34it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [0

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:34<80:04:51, 34.52s/it][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:38<01:23, 75.42it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:40<00:36, 133.36it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:52<00:43, 96.95it/s] [A[A

convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [00:59<00:35, 98.82it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:07<00:29, 95.58it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:14<00:00, 111.69it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 428585.04it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:32<76:26:38, 32.95s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:34<04:31, 28.22it/s] [A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:39<00:47, 118.11it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:44<00:40, 121.38it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:57<00:40, 121.38it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:59<00:50, 82.41it/s] [A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:02<00:21, 128.97it/s][A[A

convert squad examples to features:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 6265/8352 [01:03<00:12, 161.95it/s][A[A

convert squad examples to features:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 6961/83

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:27<62:57:26, 27.14s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:29<03:51, 33.06it/s] [A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:39<03:51, 33.06it/s][A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:41<02:38, 43.89it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:43<00:35, 138.18it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:59<00:35, 138.18it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [01:05<00:56, 73.92it/s] [A[A

convert squad examples to features:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 6961/8352 [01:06<00:07, 175.30it/s][A[A

convert squad examples to features:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 6961/8352 [01:19<00:07, 

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/8352 [00:23<54:47:52, 23.62s/it][A
convert squad examples to features:   8%|‚ñä         | 697/8352 [00:33<04:48, 26.51it/s] [A
convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:35<01:59, 58.47it/s][A
convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:39<01:15, 82.48it/s][A
convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:41<00:29, 167.42it/s][A
convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:53<00:29, 167.42it/s][A
convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:58<00:47, 87.27it/s] [A
convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [01:00<00:31, 112.06it/s][A
convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:10<00:29, 94.65it/s] [A
convert squad examples to f

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/13019 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:27<63:04:29, 27.19s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:28<03:43, 34.28it/s] [A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:39<02:30, 46.25it/s][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:42<00:52, 106.80it/s][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:55<00:52, 106.80it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:56<00:40, 103.21it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:05<00:23, 120.62it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:15<00:23, 120.62it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:19

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:36<83:38:51, 36.06s/it][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:39<00:56, 98.58it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:40<00:39, 124.16it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:50<00:41, 100.81it/s][A[A

convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [00:52<00:26, 130.10it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:04<00:28, 96.91it/s] [A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:04<00:28, 96.91it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:14<00:28, 96.91it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:31<73:21:22, 31.62s/it][A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:36<02:14, 51.89it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:48<00:49, 98.33it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [01:01<00:49, 98.33it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [01:03<00:53, 78.50it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:06<00:23, 117.24it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:21<00:23, 117.24it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:27<00:00, 95.92it/s] 


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:36<84:17:02, 36.33s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:38<05:03, 25.25it/s] [A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:40<00:32, 150.31it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:58<00:32, 150.31it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:59<00:47, 88.22it/s] [A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:07<00:25, 108.76it/s][A[A

convert squad examples to features:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 6265/8352 [01:08<00:15, 137.00it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:17<00:00, 108.01it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:25<59:54:08, 25.82s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:37<05:27, 23.38it/s] [A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:38<00:47, 116.45it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:49<00:50, 97.19it/s] [A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:57<00:45, 91.42it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:05<00:23, 117.46it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:17<00:23, 117.46it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:22<00:00, 101.16it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:24<56:53:56, 24.53s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:31<04:27, 28.63it/s] [A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:36<02:06, 55.17it/s][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:38<00:44, 125.18it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:41<00:33, 146.03it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:52<00:33, 146.03it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:53<00:42, 99.22it/s] [A[A

convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [00:55<00:26, 131.30it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:05<00:26, 10

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/8352 [00:23<53:28:04, 23.05s/it][A
convert squad examples to features:   8%|‚ñä         | 697/8352 [00:29<04:13, 30.23it/s] [A
convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:34<01:58, 58.60it/s][A
convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:41<01:27, 71.73it/s][A
convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:42<00:32, 150.08it/s][A
convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:56<00:43, 96.32it/s] [A
convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [00:57<00:27, 124.97it/s][A
convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:05<00:24, 111.98it/s][A
convert squad examples to features:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 6961/8352 [01:06<00:06, 200.29it/s][A
convert squad examp

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

Running Epoch 4 of 4:   0%|          | 0/13019 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:23<53:41:01, 23.14s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:34<05:08, 24.84it/s] [A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:38<02:15, 51.47it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:45<00:38, 125.85it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:50<00:32, 128.53it/s][A[A

convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [00:59<00:32, 108.20it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:05<00:24, 111.61it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:18<00:24, 111.61it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:32<75:59:00, 32.76s/it][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:40<01:00, 91.48it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:41<00:41, 118.68it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:52<00:41, 118.68it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:58<00:54, 76.50it/s] [A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:01<00:23, 120.91it/s][A[A

convert squad examples to features:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 6265/8352 [01:04<00:15, 136.26it/s][A[A

convert squad examples to features:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 6961/8352 [01:05<00:07, 177.84it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:37<87:19:15, 37.64s/it][A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:40<02:23, 48.41it/s][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:42<01:23, 74.57it/s][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [01:00<01:23, 74.57it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [01:01<00:45, 92.24it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:07<00:23, 120.34it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:18<00:00, 106.52it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 415017.85it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:32<75:50:07, 32.69s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:35<04:36, 27.65it/s] [A[A

convert squad examples to features:   9%|‚ñä         | 728/8352 [00:35<04:23, 28.94it/s][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:40<01:02, 99.52it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:45<00:32, 151.97it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:55<00:32, 151.97it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:57<00:40, 103.87it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:06<00:23, 118.76it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:20<00:00, 

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:36<84:32:44, 36.45s/it][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:38<00:54, 102.29it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:43<00:44, 110.09it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:59<00:44, 110.09it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [01:03<01:01, 68.36it/s] [A[A

convert squad examples to features:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 4873/8352 [01:04<00:37, 91.66it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:07<00:25, 110.73it/s][A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:19<00:25, 110.73it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñ

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:27<63:30:11, 27.38s/it][A[A

convert squad examples to features:   8%|‚ñä         | 697/8352 [00:28<03:43, 34.31it/s] [A[A

convert squad examples to features:  17%|‚ñà‚ñã        | 1393/8352 [00:31<01:39, 69.85it/s][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:35<01:06, 94.53it/s][A[A

convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:36<00:39, 140.38it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:42<00:37, 131.62it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:53<00:37, 131.62it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [00:59<00:54, 76.42it/s] [A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:01<00:21, 131.47it/

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:34<80:38:05, 34.76s/it][A[A

convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:39<01:25, 73.01it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:45<00:44, 109.63it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3481/8352 [00:55<00:44, 109.63it/s][A[A

convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [01:00<00:51, 80.76it/s] [A[A

convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:04<00:23, 118.32it/s][A[A

convert squad examples to features:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 6961/8352 [01:05<00:07, 181.76it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:20<00:00, 103.85it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/8352 [00:32<75:53:41, 32.72s/it][A
convert squad examples to features:  25%|‚ñà‚ñà‚ñå       | 2089/8352 [00:38<01:27, 71.82it/s][A
convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:43<01:03, 87.16it/s][A
convert squad examples to features:  33%|‚ñà‚ñà‚ñà‚ñé      | 2785/8352 [00:58<01:03, 87.16it/s][A
convert squad examples to features:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4177/8352 [01:02<00:52, 79.24it/s][A
convert squad examples to features:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 5569/8352 [01:07<00:25, 111.11it/s][A
convert squad examples to features:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 6961/8352 [01:08<00:08, 171.09it/s][A
convert squad examples to features:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 6961/8352 [01:18<00:08, 171.09it/s][A
convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:28<00:00, 94.34it/s

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

(13016,
 {'global_step': [500,
   1000,
   1500,
   2000,
   2500,
   3000,
   3254,
   3500,
   4000,
   4500,
   5000,
   5500,
   6000,
   6500,
   6508,
   7000,
   7500,
   8000,
   8500,
   9000,
   9500,
   9762,
   10000,
   10500,
   11000,
   11500,
   12000,
   12500,
   13000,
   13016],
  'correct': [5192,
   5463,
   5526,
   5609,
   5640,
   5707,
   5637,
   5561,
   5679,
   5686,
   5694,
   5658,
   5713,
   5764,
   5750,
   5650,
   5709,
   5674,
   5708,
   5697,
   5730,
   5686,
   5703,
   5722,
   5691,
   5717,
   5727,
   5703,
   5725,
   5726],
  'similar': [2712,
   2464,
   2389,
   2209,
   2140,
   2215,
   2331,
   2329,
   2159,
   2172,
   2249,
   2215,
   2228,
   2100,
   2121,
   2199,
   2131,
   2226,
   2108,
   2162,
   2141,
   2202,
   2182,
   2175,
   2146,
   2145,
   2140,
   2180,
   2129,
   2128],
  'incorrect': [448,
   425,
   437,
   534,
   572,
   430,
   384,
   462,
   514,
   494,
   409,
   479,
   411,
   488,
   481,
  

## Paso 8: Evaluaci√≥n de los resultados

In [18]:
result_original, texts_original = model_original.eval_model(eval_data)
print("üìä Evaluation results:")
print(result_original)

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:34<00:00, 88.32it/s] 
add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 392511.06it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

üìä Evaluation results:
{'correct': 4231, 'similar': 3309, 'incorrect': 812, 'eval_loss': -1.3638708443096508}


In [19]:
result, texts = model.eval_model(eval_data)
print("üìä Evaluation results:")
print(result)

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:37<00:00, 85.63it/s]
add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 395261.34it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

üìä Evaluation results:
{'correct': 5726, 'similar': 2128, 'incorrect': 498, 'eval_loss': -8.722951621291607}


In [20]:
correct = result_original['correct']
similar = result_original['similar']
incorrect = result_original['incorrect']
total = correct + similar + incorrect

# 1Ô∏è‚É£ Exact Match Accuracy
exact_match = correct / total

# 2Ô∏è‚É£ Weighted Accuracy (partial credit for 'similar')
weighted_accuracy = (correct + 0.5 * similar) / total

# 3Ô∏è‚É£ F1 Score approximation
TP = correct + 0.5 * similar
FN = 0.5 * similar + incorrect
# Assuming FP = 0 (as Simple Transformers counts predictions, not negatives)
precision = TP / TP
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)

# Print results
print(f"Exact Match (EM): {exact_match:.4f} ‚Üí {exact_match*100:.2f}%")
print(f"Weighted Accuracy: {weighted_accuracy:.4f} ‚Üí {weighted_accuracy*100:.2f}%")
print(f"F1 Score: {f1_score:.4f} ‚Üí {f1_score*100:.2f}%")

Exact Match (EM): 0.5066 ‚Üí 50.66%
Weighted Accuracy: 0.7047 ‚Üí 70.47%
F1 Score: 0.8268 ‚Üí 82.68%


In [21]:
correct = result['correct']
similar = result['similar']
incorrect = result['incorrect']
total = correct + similar + incorrect

# 1Ô∏è‚É£ Exact Match Accuracy
exact_match = correct / total

# 2Ô∏è‚É£ Weighted Accuracy (partial credit for 'similar')
weighted_accuracy = (correct + 0.5 * similar) / total

# 3Ô∏è‚É£ F1 Score approximation
TP = correct + 0.5 * similar
FN = 0.5 * similar + incorrect
# Assuming FP = 0 (as Simple Transformers counts predictions, not negatives)
precision = TP / TP
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)

# Print results
print(f"Exact Match (EM): {exact_match:.4f} ‚Üí {exact_match*100:.2f}%")
print(f"Weighted Accuracy: {weighted_accuracy:.4f} ‚Üí {weighted_accuracy*100:.2f}%")
print(f"F1 Score: {f1_score:.4f} ‚Üí {f1_score*100:.2f}%")

Exact Match (EM): 0.6856 ‚Üí 68.56%
Weighted Accuracy: 0.8130 ‚Üí 81.30%
F1 Score: 0.8968 ‚Üí 89.68%


## Paso 9: Guardar los resultados

In [42]:
# Folder to save
local_path = "./QA_model_distill-bert2"
os.makedirs(local_path, exist_ok=True)

# Save the Hugging Face model & tokenizer directly
model.model.save_pretrained(local_path)       # Saves weights + config
model.tokenizer.save_pretrained(local_path)   # Saves vocab + tokenizer config

# Check files
!ls -l ./QA_model

ls: cannot access './QA_model': No such file or directory


In [43]:
shutil.make_archive("QA_model_distill-bert2", 'zip', local_path)
print("‚úÖ Zipped model")
!ls -lh QA_model.zip

‚úÖ Zipped model
ls: cannot access 'QA_model.zip': No such file or directory


In [44]:
files.download("QA_model_distill-bert2.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [66]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [39]:
from huggingface_hub import login

# This will open a prompt for your Hugging Face token
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv‚Ä¶

In [40]:
from huggingface_hub import whoami
print(whoami())

{'type': 'user', 'id': '6682cc422b6af3f60a185123', 'name': 'BlueAutomata', 'fullname': 'Guillermo Luigui Ubaldo Nieto Angarita', 'email': 'guillermo.luigui.nieto@gmail.com', 'emailVerified': True, 'canPay': False, 'periodEnd': 1764547199, 'isPro': False, 'avatarUrl': '/avatars/b6cf26e7fac6e034fc5f2b2b87f9ff70.svg', 'orgs': [], 'auth': {'type': 'access_token', 'accessToken': {'displayName': 'write_token', 'role': 'write', 'createdAt': '2025-10-26T19:35:35.613Z'}}}


In [41]:

from huggingface_hub import login, create_repo
from transformers import AutoModelForQuestionAnswering, AutoTokenizer

# 1Ô∏è‚É£ Define your paths and repo name
model_dir = "./outputs"
repo_id = "BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico"

# 2Ô∏è‚É£ Create the repo (won‚Äôt fail if it already exists)
create_repo(repo_id, private=False, exist_ok=True)

# 3Ô∏è‚É£ Load your SimpleTransformers model as a standard HF model
hf_model = AutoModelForQuestionAnswering.from_pretrained(model_dir)
hf_tokenizer = AutoTokenizer.from_pretrained(model_dir)

# 4Ô∏è‚É£ Push to the Hugging Face Hub
hf_model.push_to_hub(
    repo_id,
    description="DistilBERT-base Spanish WWM cased model fine-tuned for extractive QA on news articles from Colombia and Mexico.",
    tags=[
        "spanish",
        "question-answering",
        "extractive-qa",
        "distilbert",
        "bert",
        "wwm-cased",
        "colombia",
        "mexico",
        "news-dataset",
        "fine-tuned-model"
    ]
)

hf_tokenizer.push_to_hub(repo_id)

The following layers were not sharded: bert.encoder.layer.*.output.dense.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.key.bias, qa_outputs.bias, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.LayerNorm.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.word_embeddings.weight, bert.enco

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...e9cqegd/model.safetensors:  10%|9         | 41.9MB /  437MB            

No files have been modified since last commit. Skipping to prevent empty commit.
No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico/commit/bd8af2f2ee2cb9253a637639b5daa70360fb1002', commit_message='Upload tokenizer', commit_description='', oid='bd8af2f2ee2cb9253a637639b5daa70360fb1002', pr_url=None, repo_url=RepoUrl('https://huggingface.co/BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico', endpoint='https://huggingface.co', repo_type='model', repo_id='BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico'), pr_revision=None, pr_num=None)

In [46]:

shutil.move("QA_model_distill-bert2.zip", "/content/drive/MyDrive/Thesis_QA_Optimization/Model")

'/content/drive/MyDrive/Thesis_QA_Optimization/Model/QA_model_distill-bert2.zip'

In [47]:
# Path to the folder containing the saved model
model_path = "./QA_model_distill-bert"  # change if different

# Reload the model
my_model = QuestionAnsweringModel(
    "bert",
    model_path,
    use_cuda=True  # set to False if no GPU
)

The following layers were not sharded: bert.encoder.layer.*.output.dense.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.key.bias, qa_outputs.bias, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.LayerNorm.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.word_embeddings.weight, bert.enco

In [48]:
# Context & question
context = "Ciudad de M√©xico. El capit√°n de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez, fue hallado muerto en Tamaulipas."
question = "¬øQui√©n fue hallado muerto en Tamaulipas?"

# Prepare input in SimpleTransformers format
to_predict = [
    {
        "context": context,
        "qas": [
            {
                "id": "0",
                "question": question,
                "answers": [{"text": " ", "answer_start": 0}],
                "is_impossible": False
            }
        ]
    }
]

# Run prediction
answers = my_model.predict(to_predict)
print(answers)

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 256.99it/s]
add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 8422.30it/s]


Running Prediction:   0%|          | 0/1 [00:00<?, ?it/s]

([{'id': '0', 'answer': ['Abraham Jerem√≠as P√©rez Ram√≠rez,', 'Ram√≠rez', 'Abraham', 'Abraham Jerem√≠as P√©rez', 'Jerem√≠as P√©rez Ram√≠rez,', 'Abraham Jerem√≠as', 'P√©rez Ram√≠rez', 'El capit√°n de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'capit√°n de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'Abraham Jerem√≠as P√©rez Ram√≠rez, fue hallado muerto en Tamaulipas.', 'Ciudad de M√©xico. El capit√°n de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'M√©xico. El capit√°n de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', '', 'Ram√≠rez,', 'Jerem√≠as P√©rez', 'Jerem√≠as', 'P√©rez', 'El capit√°n de la Secretar√≠a de Marina, Abraham']}], [{'id': '0', 'probability': [0.9999723004715582, 1.3360977435901276e-05, 7.148124721214904e-06, 2.2779942891231264e-06, 1.8592447521830305e-06, 1.3287423030760395e-06, 9.759334396760402e-07, 2.8290556150848015e-07, 1.7982571593526489e-07, 1.2

  with amp.autocast():


In [34]:
# Load your fine-tuned model from the Hub
qa = pipeline(
    "question-answering",
    model="BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico",
    tokenizer="BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico"
)

# Provide Spanish context
contexto = """
El presidente Gustavo Petro anunci√≥ nuevas medidas para impulsar el uso de energ√≠as renovables en Colombia,
especialmente en la regi√≥n del Caribe, donde los proyectos solares y e√≥licos han ganado protagonismo.
El objetivo del gobierno es reducir las emisiones de carbono en un 30% para el a√±o 2030.
"""

# Ask questions in Spanish
preguntas = [
    "¬øQui√©n anunci√≥ nuevas medidas para energ√≠as renovables?",
    "¬øEn qu√© regi√≥n se impulsar√°n los proyectos solares y e√≥licos?",
    "¬øCu√°l es el objetivo del gobierno para 2030?"
]

# Evaluate each question
for pregunta in preguntas:
    respuesta = qa(question=pregunta, context=contexto)
    print(f"‚ùì {pregunta}\nüí¨ {respuesta['answer']}\n")

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/437M [00:00<?, ?B/s]

The following layers were not sharded: bert.encoder.layer.*.output.dense.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.key.bias, qa_outputs.bias, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.LayerNorm.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.word_embeddings.weight, bert.enco

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

Device set to use cuda:0


‚ùì ¬øQui√©n anunci√≥ nuevas medidas para energ√≠as renovables?
üí¨ Gustavo Petro

‚ùì ¬øEn qu√© regi√≥n se impulsar√°n los proyectos solares y e√≥licos?
üí¨ Caribe

‚ùì ¬øCu√°l es el objetivo del gobierno para 2030?
üí¨ reducir las emisiones de carbono en un 30%



In [35]:
def flatten_squad(dataset):
    # If the dataset is a dict with "data", extract it
    if isinstance(dataset, dict) and "data" in dataset:
        dataset = dataset["data"]

    new_data = []
    for article in dataset:
        for para in article["paragraphs"]:
            new_data.append({
                "title": article.get("title", ""),
                "context": para["context"],
                "qas": para["qas"]
            })
    return new_data

In [37]:
flat_eval = flatten_squad(eval_dataset)

In [38]:
metric = evaluate.load("squad")

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

In [50]:
from transformers import pipeline
import evaluate

qa_pipeline = pipeline(
    "question-answering",
    model="BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico",
    tokenizer="BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico"
)



The following layers were not sharded: bert.encoder.layer.*.output.dense.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.key.bias, qa_outputs.bias, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.LayerNorm.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.word_embeddings.weight, bert.enco

In [49]:
from transformers import pipeline
import evaluate

qa_pipeline_original = pipeline(
    "question-answering",
    model="mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",
    tokenizer="mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
)


The following layers were not sharded: bert.encoder.layer.*.output.dense.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.key.bias, qa_outputs.bias, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.LayerNorm.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.word_embeddings.weight, bert.enco

In [51]:
predictions_original = []
references_original = []

for ex in flat_eval:
    context = ex["context"]
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            continue  # Skip unanswerable questions

        # Run QA prediction
        pred = qa_pipeline_original(question=qa["question"], context=context)

        # Collect prediction and reference
        predictions_original.append({
            "id": qa["id"],
            "prediction_text": pred["answer"]
        })

        references_original.append({
            "id": qa["id"],
            "answers": {
                "text": [a["text"] for a in qa["answers"]],
                "answer_start": [a["answer_start"] for a in qa["answers"]]
            }
        })

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [52]:
predictions = []
references = []

for ex in flat_eval:
    context = ex["context"]
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            continue  # Skip unanswerable questions

        # Run QA prediction
        pred = qa_pipeline(question=qa["question"], context=context)

        # Collect prediction and reference
        predictions.append({
            "id": qa["id"],
            "prediction_text": pred["answer"]
        })

        references.append({
            "id": qa["id"],
            "answers": {
                "text": [a["text"] for a in qa["answers"]],
                "answer_start": [a["answer_start"] for a in qa["answers"]]
            }
        })


In [53]:
results = metric.compute(predictions=predictions_original, references=references_original)
print("üìä Evaluation results:")
print(f"Exact Match: {results['exact_match']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

üìä Evaluation results:
Exact Match: 59.24
F1 Score: 74.51


In [54]:
results = metric.compute(predictions=predictions, references=references)
print("üìä Evaluation results:")
print(f"Exact Match: {results['exact_match']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

üìä Evaluation results:
Exact Match: 75.10
F1 Score: 83.60


In [55]:
import evaluate
metric = evaluate.load("squad_v2")

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

In [None]:
from tqdm import tqdm

predictions_original = []
references_original = []

for ex in tqdm(flat_eval):
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            # Questions that have no valid answer in the text
            predictions_original.append({
                "id": qa["id"],
                "prediction_text": "",
                "no_answer_probability": 1.0   # fully confident it's unanswerable
            })
            references_original.append({
                "id": qa["id"],
                "answers": {"text": [], "answer_start": []}
            })
        else:
            # Normal (answerable) questions
            pred = qa_pipeline_original(question=qa["question"], context=ex["context"])

            predictions_original.append({
                "id": qa["id"],
                "prediction_text": pred["answer"],
                # Use model confidence inversely as no-answer probability
                "no_answer_probability": 1.0 - pred.get("score", 0.0)
            })

            references_original.append({
                "id": qa["id"],
                "answers": {
                    "text": [a["text"] for a in qa["answers"]],
                    "answer_start": [a["answer_start"] for a in qa["answers"]]
                }
            })

In [56]:
from tqdm import tqdm

predictions = []
references = []

for ex in tqdm(flat_eval):
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            # Questions that have no valid answer in the text
            predictions.append({
                "id": qa["id"],
                "prediction_text": "",
                "no_answer_probability": 1.0   # fully confident it's unanswerable
            })
            references.append({
                "id": qa["id"],
                "answers": {"text": [], "answer_start": []}
            })
        else:
            # Normal (answerable) questions
            pred = qa_pipeline(question=qa["question"], context=ex["context"])

            predictions.append({
                "id": qa["id"],
                "prediction_text": pred["answer"],
                # Use model confidence inversely as no-answer probability
                "no_answer_probability": 1.0 - pred.get("score", 0.0)
            })

            references.append({
                "id": qa["id"],
                "answers": {
                    "text": [a["text"] for a in qa["answers"]],
                    "answer_start": [a["answer_start"] for a in qa["answers"]]
                }
            })

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3846/3846 [07:59<00:00,  8.01it/s]


In [57]:
predictions_original

[{'id': 'qa-20530', 'prediction_text': 'Blue Velvet Revisited'},
 {'id': 'qa-20528', 'prediction_text': 'Tuxedomoon'},
 {'id': 'qa-27430', 'prediction_text': '(National Poison Database System'},
 {'id': 'qa-27428', 'prediction_text': 'difenhidramina'},
 {'id': 'qa-21633', 'prediction_text': 'Tren Maya'},
 {'id': 'qa-20670', 'prediction_text': 'Citlali'},
 {'id': 'qa-21307', 'prediction_text': 'Argentina'},
 {'id': 'qa-21250', 'prediction_text': 'adicciones'},
 {'id': 'qa-22771', 'prediction_text': 'presidenta Claudia Sheinbaum Pardo'},
 {'id': 'qa-22775', 'prediction_text': '(15 mil 645 millones de d√≥lares)'},
 {'id': 'qa-6164',
  'prediction_text': 'secretaria de Seguridad Nacional de Estados Unidos'},
 {'id': 'qa-25169', 'prediction_text': 'Guerreros Buscadores'},
 {'id': 'qa-25882',
  'prediction_text': 'una nueva sentencia con agravantes contra el feminicidio'},
 {'id': 'qa-23223', 'prediction_text': 'l√≠deres √°rabes'},
 {'id': 'qa-25089',
  'prediction_text': 'presidente Andr√©s

In [59]:
results = metric.compute(predictions=predictions_original, references=references_original)

print("üìä Evaluation results:")
print(f"Exact Match: {results['exact']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

KeyError: 'no_answer_probability'

In [60]:
results = metric.compute(predictions=predictions, references=references)
print("üìä Evaluation results:")
print(f"Exact Match: {results['exact']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

üìä Evaluation results:
Exact Match: 82.79
F1 Score: 88.67


In [61]:
!mv outputs outputs_distill-bert2
!zip -r outputs_distill-bert2.zip outputs_distill-bert2

  adding: outputs_distill-bert2/ (stored 0%)
  adding: outputs_distill-bert2/checkpoint-10500/ (stored 0%)
  adding: outputs_distill-bert2/checkpoint-10500/config.json (deflated 49%)
  adding: outputs_distill-bert2/checkpoint-10500/optimizer.pt (deflated 10%)
  adding: outputs_distill-bert2/checkpoint-10500/model.safetensors (deflated 7%)
  adding: outputs_distill-bert2/checkpoint-10500/training_args.bin (deflated 53%)
  adding: outputs_distill-bert2/checkpoint-10500/special_tokens_map.json (deflated 42%)
  adding: outputs_distill-bert2/checkpoint-10500/vocab.txt (deflated 54%)
  adding: outputs_distill-bert2/checkpoint-10500/model_args.json (deflated 60%)
  adding: outputs_distill-bert2/checkpoint-10500/scheduler.pt (deflated 61%)
  adding: outputs_distill-bert2/checkpoint-10500/tokenizer_config.json (deflated 74%)
  adding: outputs_distill-bert2/checkpoint-10500/eval_results.txt (deflated 10%)
  adding: outputs_distill-bert2/checkpoint-12000/ (stored 0%)
  adding: outputs_distill-ber

In [63]:
shutil.move("outputs_distill-bert2.zip", "/content/drive/MyDrive/Thesis_QA_Optimization/Model")

'/content/drive/MyDrive/Thesis_QA_Optimization/Model/outputs_distill-bert2.zip'