# Entrenamiento con Simple Transformers del model Distill BERT

## Paso 1: Cargar datos en Google Colab

## Paso 2: Instalar librerías

In [1]:
!pip install transformers evaluate torch --quiet
!pip install simpletransformers transformers datasets huggingface_hub scikit-learn
!pip install evaluate --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting simpletransformers
  Downloading simpletransformers-0.70.5-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.3/43.3 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tensorboardx (from simpletransformers)
  Downloading tensorboardx-2.6.4-py3-none-any.whl.metadata (6.2 kB)
Collecting streamlit (from simpletransformers)
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit->simpletransformers)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading simpletra

## Paso 3: Cargar librerías

In [2]:
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
import evaluate
import json
import os
import shutil
import requests


from tqdm import tqdm
from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs
from sklearn.model_selection import train_test_split
from google.colab import files

In [3]:
import logging
logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)

In [4]:
import torch
print("GPU available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))

GPU available: True
GPU name: NVIDIA A100-SXM4-80GB


## Paso 4: Cargar datos

In [5]:
# URLs of the files
urls = {
    "eval_colombia_mexico_dataset.json": "https://github.com/BlueAutomata/tesis-optimizacion-de-modelos-de-question-answering/raw/refs/heads/master/src/datasets/exploration_datasets/gold/eval_colombia_mexico_dataset.json",
    "train_colombia_mexico_dataset.json": "https://github.com/BlueAutomata/tesis-optimizacion-de-modelos-de-question-answering/raw/refs/heads/master/src/datasets/exploration_datasets/gold/train_colombia_mexico_dataset.json"
}

# Dictionary to store the loaded JSON data
datasets = {}

for filename, url in urls.items():
    # Download the file
    response = requests.get(url)
    if response.status_code == 200:
        # Save locally
        with open(filename, "wb") as f:
            f.write(response.content)
        # Load JSON into Python
        datasets[filename] = response.json()
        print(f"{filename} downloaded and loaded successfully!")
    else:
        print(f"Failed to download {filename}. Status code: {response.status_code}")

eval_colombia_mexico_dataset.json downloaded and loaded successfully!
train_colombia_mexico_dataset.json downloaded and loaded successfully!


In [6]:
with open("train_colombia_mexico_dataset.json", "r", encoding="utf-8") as f:
    train_dataset = json.load(f)

print(f"✅ Loaded {len(train_dataset)} records successfully!")

✅ Loaded 1 records successfully!


In [7]:
with open("eval_colombia_mexico_dataset.json", "r", encoding="utf-8") as f:
    eval_dataset = json.load(f)

print(f"✅ Loaded {len(eval_dataset)} records successfully!")

✅ Loaded 1 records successfully!


In [8]:
train_dataset = train_dataset["data"]

In [9]:
eval_dataset = eval_dataset["data"]

In [10]:
# 🔧 1️⃣ Flatten your dataset so each row has 'context' and 'qas'
def flatten_squad(dataset):
    new_data = []
    for article in dataset:
        for para in article["paragraphs"]:
            new_data.append({
                "context": para["context"],
                "qas": para["qas"]
            })
    return new_data

In [11]:
train_data = flatten_squad(train_dataset)

In [12]:
eval_data = flatten_squad(eval_dataset)

In [13]:
print(f"✅ Training samples: {len(train_data)}")
print(f"✅ Eval samples: {len(eval_data)}")

✅ Training samples: 3235
✅ Eval samples: 2845


## Paso 5: Definir hiperparámetros

In [14]:
model_args = QuestionAnsweringArgs()

# Training behavior
model_args.train_batch_size = 8
model_args.eval_batch_size = 8
model_args.num_train_epochs = 2
model_args.learning_rate = 5e-6
model_args.gradient_accumulation_steps = 1
model_args.overwrite_output_dir = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_steps = 500
model_args.save_eval_checkpoints = False
model_args.save_model_every_epoch = False
model_args.save_steps = -1
model_args.best_model_dir = "./outputs/best_model/"
model_args.output_dir = "./outputs/"

# Optimization
model_args.max_seq_length = 384
model_args.doc_stride = 128
model_args.warmup_ratio = 0.1
model_args.max_answer_length = 30

# Logging
model_args.logging_steps = 100
model_args.evaluate_during_training_verbose = True
model_args.manual_seed = 42

# 🔹 Sliding window parameters
model_args.max_seq_length = 384          # maximum total input sequence length after tokenization
model_args.doc_stride = 128              # overlap between two sliding windows
model_args.max_query_length = 64         # maximum length of the question

# Resource handling
model_args.use_multiprocessing = False  # safer for notebooks
model_args.fp16 = torch.cuda.is_available()  # use mixed precision if CUDA available

## Paso 6: Cargar el modelo

In [15]:
model_original = QuestionAnsweringModel(
    model_type="bert",
    model_name="mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",  # BETO
    args=model_args,
    use_cuda=torch.cuda.is_available()
)

config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

The following layers were not sharded: bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.query.weight, bert.encoder.layer.*.attention.self.query.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.value.bias, bert.embeddings.token_type_embeddings.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, qa_outputs.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.weight, bert.enco

tokenizer_config.json:   0%|          | 0.00/135 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [16]:
model = QuestionAnsweringModel(
    model_type="bert",
    model_name="mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",  # BETO
    args=model_args,
    use_cuda=torch.cuda.is_available()
)

The following layers were not sharded: bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.query.weight, bert.encoder.layer.*.attention.self.query.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.value.bias, bert.embeddings.token_type_embeddings.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, qa_outputs.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.weight, bert.enco

## Paso 7: Entrenamiento del modelo

In [17]:
model.train_model(train_data, eval_data=eval_data)

100%|██████████| 14613/14613 [07:15<00:00, 33.54it/s]
add example index and unique id: 100%|██████████| 14613/14613 [00:00<00:00, 687431.18it/s]


Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 2:   0%|          | 0/8698 [00:00<?, ?it/s]

  with amp.autocast():


convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<43:36:38, 25.07s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:27<01:02, 75.61it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:32, 115.12it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:05, 219.13it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:40<00:00, 154.89it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 424912.26it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]

  with amp.autocast():


convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:26<45:45:15, 26.30s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:27<01:01, 77.44it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:28<00:27, 135.13it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:31<00:06, 253.49it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:35<00:05, 228.74it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:40<00:00, 153.41it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 395170.00it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<47:04:43, 27.07s/it][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:29<00:32, 115.95it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:31<00:07, 225.47it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:06, 187.19it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:39<00:00, 157.00it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 440864.75it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:19<33:57:17, 19.52s/it][A[A

convert squad examples to features:   8%|▊         | 501/6263 [00:20<02:46, 34.69it/s] [A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:51, 47.02it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:30<00:26, 141.30it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:30<00:05, 322.19it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:05, 210.86it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 152.73it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 432973.35it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:23<40:31:16, 23.30s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:26<01:44, 50.58it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:28<01:01, 77.12it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:30<00:27, 138.73it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:05, 313.49it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:35<00:04, 264.62it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:39<00:00, 159.23it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 435334.03it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<46:58:12, 27.00s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:27<01:42, 51.56it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:28<00:56, 83.87it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:27, 137.15it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:05, 336.34it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:32<00:03, 366.04it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 149.01it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 434901.59it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:23<40:09:12, 23.08s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:24<01:30, 58.26it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:28<01:06, 71.37it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:29, 129.36it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:04, 256.70it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:40<00:00, 155.03it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 437903.01it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<45:03:55, 25.91s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:27<01:02, 75.76it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:31, 118.23it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:05, 219.97it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:39<00:00, 157.56it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 314918.49it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:26<45:19:34, 26.06s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:26<00:59, 80.58it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:30<00:30, 122.27it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:05, 221.26it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 149.54it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 419752.10it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<47:12:01, 27.14s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:47, 49.02it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:05, 72.27it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:38<00:05, 224.66it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:40<00:00, 154.33it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 425648.97it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:20<36:24:44, 20.93s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:28<01:10, 67.78it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:32<00:35, 106.24it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:33<00:07, 226.19it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:34<00:04, 264.82it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 148.66it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 421368.03it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<48:18:44, 27.77s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:31<01:12, 65.32it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:33<00:32, 114.70it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:33<00:04, 294.97it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 149.39it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 415562.08it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<44:55:15, 25.82s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:32<01:18, 60.61it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:32<00:32, 115.64it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:05, 243.66it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:39<00:00, 158.50it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 396661.77it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:19<33:44:19, 19.40s/it][A[A

convert squad examples to features:   8%|▊         | 501/6263 [00:20<02:51, 33.65it/s] [A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:09, 68.31it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:07, 247.34it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:34<00:05, 240.31it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 150.85it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 417046.52it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:20<36:26:33, 20.95s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:22<01:24, 62.50it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:29<01:13, 64.54it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:30<00:06, 269.24it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:06, 209.81it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 151.60it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 358752.39it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<45:12:11, 25.99s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:29<01:09, 68.52it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:31<00:07, 235.43it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:33<00:05, 231.03it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 149.21it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 421652.10it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:26<45:32:37, 26.18s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:49, 47.87it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:31<01:07, 70.89it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:06, 286.77it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:34<00:04, 294.66it/s][A[A

convert squad examples to features:  88%|████████▊ | 5501/6263 [00:38<00:03, 229.50it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:39<00:00, 157.05it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 344460.81it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/6263 [00:19<34:42:52, 19.96s/it][A
convert squad examples to features:   8%|▊         | 501/6263 [00:21<02:54, 33.01it/s] [A
convert squad examples to features:  16%|█▌        | 1001/6263 [00:23<01:22, 63.64it/s][A
convert squad examples to features:  24%|██▍       | 1501/6263 [00:28<00:59, 80.55it/s][A
convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:27, 138.98it/s][A
convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:04, 267.17it/s][A
convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 151.97it/s]

add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 420336.44it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]

Running Epoch 2 of 2:   0%|          | 0/8698 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<47:19:48, 27.21s/it][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:29<00:32, 114.96it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:31<00:07, 224.80it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:39<00:07, 160.54it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:40<00:00, 155.91it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 406727.87it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:28<49:57:45, 28.72s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:30<01:54, 45.84it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:33<00:30, 122.91it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:39<00:05, 215.43it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 150.60it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 407763.28it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:26<46:16:17, 26.60s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:27<01:44, 50.35it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:31<01:09, 68.81it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:05, 299.33it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:05, 219.76it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:38<00:00, 162.08it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 409645.48it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<48:05:04, 27.64s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:47, 48.72it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:04, 74.26it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:24, 156.38it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:31<00:04, 378.70it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:32<00:03, 416.01it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 147.18it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 427568.05it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:26<45:54:55, 26.40s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:10, 67.46it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:05, 211.30it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:43<00:00, 144.48it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 391535.89it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<43:56:41, 25.26s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:49, 48.16it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:30, 123.91it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:32<00:04, 294.73it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:39<00:00, 157.04it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 401260.59it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<48:19:53, 27.79s/it][A[A

convert squad examples to features:  32%|███▏      | 2001/6263 [00:28<00:42, 100.89it/s][A[A

convert squad examples to features:  64%|██████▍   | 4001/6263 [00:28<00:09, 238.85it/s][A[A

convert squad examples to features:  73%|███████▎  | 4549/6263 [00:29<00:06, 276.55it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:38<00:08, 153.13it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 152.73it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 422072.14it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:24<43:02:34, 24.75s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:25<01:33, 55.98it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:28<01:02, 76.77it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:28, 129.72it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:05, 246.80it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 149.74it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 392050.12it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:22<39:53:35, 22.93s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:27<01:49, 48.19it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:29<01:06, 71.10it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:06, 270.11it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:34<00:04, 270.93it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:39<00:00, 159.31it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 393265.06it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:24<42:50:10, 24.63s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:25<01:37, 54.06it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:29<01:06, 71.32it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:30<00:25, 149.92it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:05, 244.98it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 150.21it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 409479.45it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:26<46:40:02, 26.83s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:27<01:42, 51.54it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:29, 128.18it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:05, 220.82it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:40<00:00, 153.08it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 394795.85it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:29<51:18:09, 29.49s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:32<01:14, 63.52it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:39<00:06, 190.00it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:44<00:00, 140.63it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 392337.03it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<44:51:38, 25.79s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:47, 48.92it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:04, 73.65it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:31<00:05, 299.19it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:35<00:05, 243.68it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:40<00:00, 154.72it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 425724.85it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<47:51:17, 27.51s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:46, 49.22it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:29<00:58, 81.86it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:32<00:28, 133.20it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:34<00:03, 325.48it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:40<00:00, 152.95it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 404735.08it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:21<37:03:37, 21.31s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:30<02:06, 41.48it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:31<01:11, 66.96it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:32<00:26, 141.55it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:04, 259.41it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 146.32it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 386080.63it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:28<49:20:03, 28.36s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:09, 68.81it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:32<00:31, 117.57it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:38<00:00, 162.15it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 397405.88it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:26<46:31:38, 26.75s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:47, 48.76it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:29<01:00, 79.16it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:05, 227.82it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 145.99it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 417849.20it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/6263 [00:24<42:52:49, 24.65s/it][A
convert squad examples to features:  16%|█▌        | 1001/6263 [00:27<01:46, 49.28it/s][A
convert squad examples to features:  24%|██▍       | 1501/6263 [00:31<01:10, 67.82it/s][A
convert squad examples to features:  72%|███████▏  | 4501/6263 [00:33<00:06, 260.02it/s][A
convert squad examples to features:  80%|███████▉  | 5001/6263 [00:33<00:04, 297.72it/s][A
convert squad examples to features: 100%|██████████| 6263/6263 [00:43<00:00, 144.18it/s]

add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 388685.58it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]

(17396,
 {'global_step': [500,
   1000,
   1500,
   2000,
   2500,
   3000,
   3500,
   4000,
   4500,
   5000,
   5500,
   6000,
   6500,
   7000,
   7500,
   8000,
   8500,
   8698,
   9000,
   9500,
   10000,
   10500,
   11000,
   11500,
   12000,
   12500,
   13000,
   13500,
   14000,
   14500,
   15000,
   15500,
   16000,
   16500,
   17000,
   17396],
  'correct': [3289,
   3493,
   3723,
   3908,
   3945,
   3951,
   4032,
   4054,
   4060,
   4122,
   4104,
   4155,
   4109,
   4182,
   4182,
   4218,
   4215,
   4228,
   4241,
   4244,
   4242,
   4220,
   4218,
   4225,
   4241,
   4240,
   4232,
   4252,
   4257,
   4268,
   4285,
   4289,
   4287,
   4279,
   4279,
   4286],
  'similar': [2648,
   2474,
   2226,
   1984,
   1976,
   2010,
   1930,
   1861,
   1820,
   1801,
   1858,
   1761,
   1872,
   1756,
   1777,
   1745,
   1737,
   1730,
   1703,
   1687,
   1689,
   1745,
   1701,
   1707,
   1677,
   1686,
   1707,
   1670,
   1690,
   1677,
   1654,
   1655,
  

## Paso 8: Evaluación de los resultados

In [18]:
result_original, texts_original = model_original.eval_model(eval_data)
print("📊 Evaluation results:")
print(result_original)

convert squad examples to features: 100%|██████████| 6263/6263 [00:46<00:00, 134.86it/s]
add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 392712.41it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]

📊 Evaluation results:
{'correct': 3005, 'similar': 2646, 'incorrect': 612, 'eval_loss': -1.367361729097682}


In [19]:
result, texts = model.eval_model(eval_data)
print("📊 Evaluation results:")
print(result)

convert squad examples to features: 100%|██████████| 6263/6263 [00:45<00:00, 139.16it/s]
add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 404516.95it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]

📊 Evaluation results:
{'correct': 4286, 'similar': 1650, 'incorrect': 327, 'eval_loss': -7.262935637417218}


In [20]:
correct = result_original['correct']
similar = result_original['similar']
incorrect = result_original['incorrect']
total = correct + similar + incorrect

# 1️⃣ Exact Match Accuracy
exact_match = correct / total

# 2️⃣ Weighted Accuracy (partial credit for 'similar')
weighted_accuracy = (correct + 0.5 * similar) / total

# 3️⃣ F1 Score approximation
TP = correct + 0.5 * similar
FN = 0.5 * similar + incorrect
# Assuming FP = 0 (as Simple Transformers counts predictions, not negatives)
precision = TP / TP
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)

# Print results
print(f"Exact Match (EM): {exact_match:.4f} → {exact_match*100:.2f}%")
print(f"Weighted Accuracy: {weighted_accuracy:.4f} → {weighted_accuracy*100:.2f}%")
print(f"F1 Score: {f1_score:.4f} → {f1_score*100:.2f}%")

Exact Match (EM): 0.4798 → 47.98%
Weighted Accuracy: 0.6910 → 69.10%
F1 Score: 0.8173 → 81.73%


In [21]:
correct = result['correct']
similar = result['similar']
incorrect = result['incorrect']
total = correct + similar + incorrect

# 1️⃣ Exact Match Accuracy
exact_match = correct / total

# 2️⃣ Weighted Accuracy (partial credit for 'similar')
weighted_accuracy = (correct + 0.5 * similar) / total

# 3️⃣ F1 Score approximation
TP = correct + 0.5 * similar
FN = 0.5 * similar + incorrect
# Assuming FP = 0 (as Simple Transformers counts predictions, not negatives)
precision = TP / TP
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)

# Print results
print(f"Exact Match (EM): {exact_match:.4f} → {exact_match*100:.2f}%")
print(f"Weighted Accuracy: {weighted_accuracy:.4f} → {weighted_accuracy*100:.2f}%")
print(f"F1 Score: {f1_score:.4f} → {f1_score*100:.2f}%")

Exact Match (EM): 0.6843 → 68.43%
Weighted Accuracy: 0.8161 → 81.61%
F1 Score: 0.8987 → 89.87%


## Paso 9: Guardar los resultados

In [22]:
# Folder to save
local_path = "./QA_model_distill-bert"
os.makedirs(local_path, exist_ok=True)

# Save the Hugging Face model & tokenizer directly
model.model.save_pretrained(local_path)       # Saves weights + config
model.tokenizer.save_pretrained(local_path)   # Saves vocab + tokenizer config

# Check files
!ls -l ./QA_model

ls: cannot access './QA_model': No such file or directory


In [24]:
shutil.make_archive("QA_model_distill-bert", 'zip', local_path)
print("✅ Zipped model")
!ls -lh QA_model.zip

✅ Zipped model
ls: cannot access 'QA_model.zip': No such file or directory


In [25]:
files.download("QA_model_distill-bert.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [26]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [27]:
from huggingface_hub import login

# This will open a prompt for your Hugging Face token
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [28]:
from huggingface_hub import whoami
print(whoami())

{'type': 'user', 'id': '6682cc422b6af3f60a185123', 'name': 'BlueAutomata', 'fullname': 'Guillermo Luigui Ubaldo Nieto Angarita', 'email': 'guillermo.luigui.nieto@gmail.com', 'emailVerified': True, 'canPay': False, 'periodEnd': None, 'isPro': False, 'avatarUrl': '/avatars/b6cf26e7fac6e034fc5f2b2b87f9ff70.svg', 'orgs': [], 'auth': {'type': 'access_token', 'accessToken': {'displayName': 'write_token', 'role': 'write', 'createdAt': '2025-10-26T19:35:35.613Z'}}}


In [29]:

from huggingface_hub import login, create_repo
from transformers import AutoModelForQuestionAnswering, AutoTokenizer

# 1️⃣ Define your paths and repo name
model_dir = "./outputs"
repo_id = "BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico"

# 2️⃣ Create the repo (won’t fail if it already exists)
create_repo(repo_id, private=False, exist_ok=True)

# 3️⃣ Load your SimpleTransformers model as a standard HF model
hf_model = AutoModelForQuestionAnswering.from_pretrained(model_dir)
hf_tokenizer = AutoTokenizer.from_pretrained(model_dir)

# 4️⃣ Push to the Hugging Face Hub
hf_model.push_to_hub(
    repo_id,
    description="DistilBERT-base Spanish WWM cased model fine-tuned for extractive QA on news articles from Colombia and Mexico.",
    tags=[
        "spanish",
        "question-answering",
        "extractive-qa",
        "distilbert",
        "bert",
        "wwm-cased",
        "colombia",
        "mexico",
        "news-dataset",
        "fine-tuned-model"
    ]
)

hf_tokenizer.push_to_hub(repo_id)

The following layers were not sharded: bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.query.weight, bert.encoder.layer.*.attention.self.query.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.value.bias, bert.embeddings.token_type_embeddings.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, qa_outputs.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.weight, bert.enco

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...9tdbiv6/model.safetensors:   0%|          |  558kB /  437MB            

README.md: 0.00B [00:00, ?B/s]

CommitInfo(commit_url='https://huggingface.co/BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico/commit/a8a26f93f1a053a905e45be05548ad6aaed574f8', commit_message='Upload tokenizer', commit_description='', oid='a8a26f93f1a053a905e45be05548ad6aaed574f8', pr_url=None, repo_url=RepoUrl('https://huggingface.co/BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico', endpoint='https://huggingface.co', repo_type='model', repo_id='BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico'), pr_revision=None, pr_num=None)

In [30]:

shutil.move("QA_model_distill-bert.zip", "/content/drive/MyDrive/Thesis_QA_Optimization/Model")

'/content/drive/MyDrive/Thesis_QA_Optimization/Model/QA_model_distill-bert.zip'

In [31]:
# Path to the folder containing the saved model
model_path = "./QA_model_distill-bert"  # change if different

# Reload the model
my_model = QuestionAnsweringModel(
    "bert",
    model_path,
    use_cuda=True  # set to False if no GPU
)

The following layers were not sharded: bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.query.weight, bert.encoder.layer.*.attention.self.query.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.value.bias, bert.embeddings.token_type_embeddings.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, qa_outputs.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.weight, bert.enco

In [32]:
# Context & question
context = "Ciudad de México. El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez, fue hallado muerto en Tamaulipas."
question = "¿Quién fue hallado muerto en Tamaulipas?"

# Prepare input in SimpleTransformers format
to_predict = [
    {
        "context": context,
        "qas": [
            {
                "id": "0",
                "question": question,
                "answers": [{"text": " ", "answer_start": 0}],
                "is_impossible": False
            }
        ]
    }
]

# Run prediction
answers = my_model.predict(to_predict)
print(answers)

convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 211.76it/s]
add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 11949.58it/s]


Running Prediction:   0%|          | 0/1 [00:00<?, ?it/s]

([{'id': '0', 'answer': ['Abraham Jeremías Pérez Ramírez,', 'El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Ramírez', 'capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Jeremías Pérez Ramírez,', 'Abraham', 'Abraham Jeremías Pérez', 'Pérez Ramírez', 'Abraham Jeremías', 'Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue hallado muerto en Tamaulipas.', 'Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue', 'México. El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', '', 'El capitán de la Secretaría de Marina', 'El capitán de la Secretaría de Marina, Abraham', 'El capitán de la Secretaría de Marina, Abraham Jeremías Pérez']}], [{'id': '0', 'probability': [0.9992364060818621, 0.0002562598541313077, 8.508518425486447e-05, 8.319539392861983e-05, 7.846072921677978e-05, 6.917376500808479e-05, 5.6678

  with amp.autocast():


In [33]:
# Load your fine-tuned model from the Hub
qa = pipeline(
    "question-answering",
    model="BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico",
    tokenizer="BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico"
)

# Provide Spanish context
contexto = """
El presidente Gustavo Petro anunció nuevas medidas para impulsar el uso de energías renovables en Colombia,
especialmente en la región del Caribe, donde los proyectos solares y eólicos han ganado protagonismo.
El objetivo del gobierno es reducir las emisiones de carbono en un 30% para el año 2030.
"""

# Ask questions in Spanish
preguntas = [
    "¿Quién anunció nuevas medidas para energías renovables?",
    "¿En qué región se impulsarán los proyectos solares y eólicos?",
    "¿Cuál es el objetivo del gobierno para 2030?"
]

# Evaluate each question
for pregunta in preguntas:
    respuesta = qa(question=pregunta, context=contexto)
    print(f"❓ {pregunta}\n💬 {respuesta['answer']}\n")

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/437M [00:00<?, ?B/s]

The following layers were not sharded: bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.query.weight, bert.encoder.layer.*.attention.self.query.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.value.bias, bert.embeddings.token_type_embeddings.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, qa_outputs.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.weight, bert.enco

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

Device set to use cuda:0


❓ ¿Quién anunció nuevas medidas para energías renovables?
💬 Gustavo Petro

❓ ¿En qué región se impulsarán los proyectos solares y eólicos?
💬 Caribe

❓ ¿Cuál es el objetivo del gobierno para 2030?
💬 reducir las emisiones de carbono en un 30%



In [34]:
def flatten_squad(dataset):
    # If the dataset is a dict with "data", extract it
    if isinstance(dataset, dict) and "data" in dataset:
        dataset = dataset["data"]

    new_data = []
    for article in dataset:
        for para in article["paragraphs"]:
            new_data.append({
                "title": article.get("title", ""),
                "context": para["context"],
                "qas": para["qas"]
            })
    return new_data

In [35]:
flat_eval = flatten_squad(eval_dataset)

In [36]:
metric = evaluate.load("squad")

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

In [37]:
from transformers import pipeline
import evaluate

qa_pipeline = pipeline(
    "question-answering",
    model="BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico",
    tokenizer="BlueAutomata/distill-bert-base-spanish-wwm-cased-news-qa-colombia-mexico"
)



The following layers were not sharded: bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.query.weight, bert.encoder.layer.*.attention.self.query.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.value.bias, bert.embeddings.token_type_embeddings.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, qa_outputs.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.weight, bert.enco

In [38]:
from transformers import pipeline
import evaluate

qa_pipeline_original = pipeline(
    "question-answering",
    model="mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",
    tokenizer="mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
)


The following layers were not sharded: bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.query.weight, bert.encoder.layer.*.attention.self.query.bias, qa_outputs.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.position_embeddings.weight, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.value.bias, bert.embeddings.token_type_embeddings.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, qa_outputs.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.weight, bert.enco

In [39]:
predictions_original = []
references_original = []

for ex in flat_eval:
    context = ex["context"]
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            continue  # Skip unanswerable questions

        # Run QA prediction
        pred = qa_pipeline_original(question=qa["question"], context=context)

        # Collect prediction and reference
        predictions_original.append({
            "id": qa["id"],
            "prediction_text": pred["answer"]
        })

        references_original.append({
            "id": qa["id"],
            "answers": {
                "text": [a["text"] for a in qa["answers"]],
                "answer_start": [a["answer_start"] for a in qa["answers"]]
            }
        })

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [40]:
predictions = []
references = []

for ex in flat_eval:
    context = ex["context"]
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            continue  # Skip unanswerable questions

        # Run QA prediction
        pred = qa_pipeline(question=qa["question"], context=context)

        # Collect prediction and reference
        predictions.append({
            "id": qa["id"],
            "prediction_text": pred["answer"]
        })

        references.append({
            "id": qa["id"],
            "answers": {
                "text": [a["text"] for a in qa["answers"]],
                "answer_start": [a["answer_start"] for a in qa["answers"]]
            }
        })


In [41]:
results = metric.compute(predictions=predictions_original, references=references_original)
print("📊 Evaluation results:")
print(f"Exact Match: {results['exact_match']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

📊 Evaluation results:
Exact Match: 56.99
F1 Score: 74.46


In [42]:
results = metric.compute(predictions=predictions, references=references)
print("📊 Evaluation results:")
print(f"Exact Match: {results['exact_match']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

📊 Evaluation results:
Exact Match: 75.12
F1 Score: 84.81


In [43]:
import evaluate
metric = evaluate.load("squad_v2")

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

In [44]:
from tqdm import tqdm

predictions_original = []
references_original = []

for ex in tqdm(flat_eval):
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            # Questions that have no valid answer in the text
            predictions_original.append({
                "id": qa["id"],
                "prediction_text": "",
                "no_answer_probability": 1.0   # fully confident it's unanswerable
            })
            references_original.append({
                "id": qa["id"],
                "answers": {"text": [], "answer_start": []}
            })
        else:
            # Normal (answerable) questions
            pred = qa_pipeline_original(question=qa["question"], context=ex["context"])

            predictions_original.append({
                "id": qa["id"],
                "prediction_text": pred["answer"],
                # Use model confidence inversely as no-answer probability
                "no_answer_probability": 1.0 - pred.get("score", 0.0)
            })

            references_original.append({
                "id": qa["id"],
                "answers": {
                    "text": [a["text"] for a in qa["answers"]],
                    "answer_start": [a["answer_start"] for a in qa["answers"]]
                }
            })

100%|██████████| 2845/2845 [02:56<00:00, 16.12it/s]


In [45]:
from tqdm import tqdm

predictions = []
references = []

for ex in tqdm(flat_eval):
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            # Questions that have no valid answer in the text
            predictions.append({
                "id": qa["id"],
                "prediction_text": "",
                "no_answer_probability": 1.0   # fully confident it's unanswerable
            })
            references.append({
                "id": qa["id"],
                "answers": {"text": [], "answer_start": []}
            })
        else:
            # Normal (answerable) questions
            pred = qa_pipeline(question=qa["question"], context=ex["context"])

            predictions.append({
                "id": qa["id"],
                "prediction_text": pred["answer"],
                # Use model confidence inversely as no-answer probability
                "no_answer_probability": 1.0 - pred.get("score", 0.0)
            })

            references.append({
                "id": qa["id"],
                "answers": {
                    "text": [a["text"] for a in qa["answers"]],
                    "answer_start": [a["answer_start"] for a in qa["answers"]]
                }
            })

100%|██████████| 2845/2845 [02:43<00:00, 17.39it/s]


In [46]:
predictions_original

[{'id': 'qa-15740',
  'prediction_text': 'capital de Illinois',
  'no_answer_probability': 0.850683030905202},
 {'id': 'qa-15744',
  'prediction_text': 'festival de jaraneros',
  'no_answer_probability': 0.3174792132922448},
 {'id': 'qa-15745', 'prediction_text': '', 'no_answer_probability': 1.0},
 {'id': 'qa-15746', 'prediction_text': '', 'no_answer_probability': 1.0},
 {'id': 'qa-15859', 'prediction_text': '', 'no_answer_probability': 1.0},
 {'id': 'qa-15856',
  'prediction_text': 'Instituto Nacional de Bellas Artes y Literatura',
  'no_answer_probability': 0.3024258017539978},
 {'id': 'qa-15901',
  'prediction_text': 'investigación participativa y trabajo comunitario',
  'no_answer_probability': 0.6884381473064423},
 {'id': 'qa-15902', 'prediction_text': '', 'no_answer_probability': 1.0},
 {'id': 'qa-15431', 'prediction_text': '', 'no_answer_probability': 1.0},
 {'id': 'qa-15432', 'prediction_text': '', 'no_answer_probability': 1.0},
 {'id': 'qa-15426',
  'prediction_text': 'Lucina 

In [47]:
results = metric.compute(predictions=predictions_original, references=references_original)

print("📊 Evaluation results:")
print(f"Exact Match: {results['exact']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

📊 Evaluation results:
Exact Match: 69.97
F1 Score: 82.17


In [48]:
results = metric.compute(predictions=predictions, references=references)
print("📊 Evaluation results:")
print(f"Exact Match: {results['exact']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

📊 Evaluation results:
Exact Match: 82.63
F1 Score: 89.40


In [49]:
!mv outputs outputs_distill-bert
!zip -r outputs_distill-bert.zip outputs_distill-bert

  adding: outputs_distill-bert/ (stored 0%)
  adding: outputs_distill-bert/null_odds_test.json (deflated 77%)
  adding: outputs_distill-bert/predictions_test.json (deflated 70%)
  adding: outputs_distill-bert/vocab.txt (deflated 54%)
  adding: outputs_distill-bert/config.json (deflated 49%)
  adding: outputs_distill-bert/checkpoint-17396-epoch-2/ (stored 0%)
  adding: outputs_distill-bert/checkpoint-17396-epoch-2/eval_results.txt (deflated 10%)
  adding: outputs_distill-bert/special_tokens_map.json (deflated 42%)
  adding: outputs_distill-bert/training_args.bin (deflated 53%)
  adding: outputs_distill-bert/model.safetensors (deflated 7%)
  adding: outputs_distill-bert/nbest_predictions_test.json (deflated 87%)
  adding: outputs_distill-bert/training_progress_scores.csv (deflated 51%)
  adding: outputs_distill-bert/model_args.json (deflated 60%)
  adding: outputs_distill-bert/checkpoint-8698-epoch-1/ (stored 0%)
  adding: outputs_distill-bert/checkpoint-8698-epoch-1/eval_results.txt (de

In [50]:
shutil.move("outputs_distill-bert.zip", "/content/drive/MyDrive/Thesis_QA_Optimization/Model")

'/content/drive/MyDrive/Thesis_QA_Optimization/Model/outputs_distill-bert.zip'

In [None]:
import os
import shutil

# Define the full path to the destination directory
destination_dir = '/content/drive/MyDrive/Thesis_QA_Optimization/Model'
source_file = 'outputs_bert.zip' # This is the file you want to move

# 1. Check if the directory exists and create it if it doesn't
# The `exist_ok=True` argument prevents an error if the directory already exists.
# The `os.makedirs` function creates all intermediate-level directories needed.
os.makedirs(destination_dir, exist_ok=True)

# 2. Now you can safely move the file
try:
    shutil.move(source_file, destination_dir)
    print(f"Successfully moved {source_file} to {destination_dir}")
except FileNotFoundError as e:
    # This might catch a different FileNotFoundError if the source file doesn't exist,
    # but the primary directory issue should be resolved by os.makedirs.
    print(f"Error moving file: {e}")