# Entrenamiento con Simple Transformers del model BERT

## Paso 1: Cargar datos en Google Colab

## Paso 2: Instalar librerías

In [1]:
!pip install transformers evaluate torch --quiet
!pip install simpletransformers transformers datasets huggingface_hub scikit-learn
!pip install evaluate --quiet



## Paso 3: Cargar librerías

In [2]:
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
import evaluate
import json
import os
import shutil
import requests


from tqdm import tqdm
from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs
from sklearn.model_selection import train_test_split
from google.colab import files

In [3]:
import logging
logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)

In [4]:
import torch
print("GPU available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))

GPU available: True
GPU name: NVIDIA A100-SXM4-80GB


## Paso 4: Cargar datos

In [6]:
# URLs of the files
urls = {
    "eval_colombia_mexico_dataset.json": "https://github.com/BlueAutomata/tesis-optimizacion-de-modelos-de-question-answering/raw/refs/heads/master/src/datasets/exploration_datasets/gold/eval_colombia_mexico_dataset.json",
    "train_colombia_mexico_dataset.json": "https://github.com/BlueAutomata/tesis-optimizacion-de-modelos-de-question-answering/raw/refs/heads/master/src/datasets/exploration_datasets/gold/train_colombia_mexico_dataset.json"
}

# Dictionary to store the loaded JSON data
datasets = {}

for filename, url in urls.items():
    # Download the file
    response = requests.get(url)
    if response.status_code == 200:
        # Save locally
        with open(filename, "wb") as f:
            f.write(response.content)
        # Load JSON into Python
        datasets[filename] = response.json()
        print(f"{filename} downloaded and loaded successfully!")
    else:
        print(f"Failed to download {filename}. Status code: {response.status_code}")

eval_colombia_mexico_dataset.json downloaded and loaded successfully!
train_colombia_mexico_dataset.json downloaded and loaded successfully!


In [7]:
with open("train_colombia_mexico_dataset.json", "r", encoding="utf-8") as f:
    train_dataset = json.load(f)

print(f"✅ Loaded {len(train_dataset)} records successfully!")

✅ Loaded 1 records successfully!


In [8]:
with open("eval_colombia_mexico_dataset.json", "r", encoding="utf-8") as f:
    eval_dataset = json.load(f)

print(f"✅ Loaded {len(eval_dataset)} records successfully!")

✅ Loaded 1 records successfully!


In [9]:
train_dataset = train_dataset["data"]

In [10]:
eval_dataset = eval_dataset["data"]

In [11]:
# 🔧 1️⃣ Flatten your dataset so each row has 'context' and 'qas'
def flatten_squad(dataset):
    new_data = []
    for article in dataset:
        for para in article["paragraphs"]:
            new_data.append({
                "context": para["context"],
                "qas": para["qas"]
            })
    return new_data

In [12]:
train_data = flatten_squad(train_dataset)

In [13]:
eval_data = flatten_squad(eval_dataset)

In [14]:
print(f"✅ Training samples: {len(train_data)}")
print(f"✅ Eval samples: {len(eval_data)}")

✅ Training samples: 3235
✅ Eval samples: 2845


## Paso 5: Definir hiperparámetros

In [20]:
model_args = QuestionAnsweringArgs()

# Training behavior
model_args.train_batch_size = 8
model_args.eval_batch_size = 8
model_args.num_train_epochs = 2
model_args.learning_rate = 5e-6
model_args.gradient_accumulation_steps = 1
model_args.overwrite_output_dir = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_steps = 500
model_args.save_eval_checkpoints = False
model_args.save_model_every_epoch = False
model_args.save_steps = -1
model_args.best_model_dir = "./outputs/best_model/"
model_args.output_dir = "./outputs/"

# Optimization
model_args.max_seq_length = 384
model_args.doc_stride = 128
model_args.warmup_ratio = 0.1
model_args.max_answer_length = 30

# Logging
model_args.logging_steps = 100
model_args.evaluate_during_training_verbose = True
model_args.manual_seed = 42

# 🔹 Sliding window parameters
model_args.max_seq_length = 384          # maximum total input sequence length after tokenization
model_args.doc_stride = 128              # overlap between two sliding windows
model_args.max_query_length = 64         # maximum length of the question

# Resource handling
model_args.use_multiprocessing = False  # safer for notebooks
model_args.fp16 = torch.cuda.is_available()  # use mixed precision if CUDA available

## Paso 6: Cargar el modelo

In [21]:
model_original = QuestionAnsweringModel(
    model_type="bert",
    model_name="mrm8488/bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",  # BETO
    args=model_args,
    use_cuda=torch.cuda.is_available()
)

The following layers were not sharded: bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.attention.self.value.weight, qa_outputs.weight, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, qa_outputs.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.dense.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.b

In [22]:
model = QuestionAnsweringModel(
    model_type="bert",
    model_name="mrm8488/bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",  # BETO
    args=model_args,
    use_cuda=torch.cuda.is_available()
)

The following layers were not sharded: bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.attention.self.value.weight, qa_outputs.weight, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, qa_outputs.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.dense.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.b

## Paso 7: Entrenamiento del modelo

In [23]:
model.train_model(train_data, eval_data=eval_data)

100%|██████████| 14613/14613 [07:25<00:00, 32.78it/s]
add example index and unique id: 100%|██████████| 14613/14613 [00:00<00:00, 646478.82it/s]


Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 2:   0%|          | 0/8698 [00:00<?, ?it/s]

  with amp.autocast():


convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<48:28:16, 27.87s/it][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:35, 106.46it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:06, 205.84it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:39<00:00, 157.21it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 425918.12it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]

  with amp.autocast():


convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<43:38:04, 25.09s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:29<01:09, 68.66it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:29<00:28, 130.57it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:30<00:06, 285.36it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:06, 197.97it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 152.56it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 416629.81it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<47:38:37, 27.39s/it][A[A

convert squad examples to features:  32%|███▏      | 2001/6263 [00:28<00:43, 97.34it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:34<00:09, 192.26it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:38<00:07, 175.09it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 148.41it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 384009.33it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<47:09:16, 27.11s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:31<02:03, 42.53it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:32<01:07, 70.99it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:33<00:06, 289.19it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:40<00:06, 197.88it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 147.47it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 395902.55it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:22<39:33:16, 22.74s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:27<01:47, 48.85it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:29<01:07, 70.74it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:30<00:25, 150.03it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:31<00:05, 324.00it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:05, 223.63it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:40<00:00, 152.93it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 405340.87it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:24<43:23:13, 24.94s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:28<01:07, 70.53it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:30<00:30, 122.29it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:06, 209.76it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 147.87it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 383426.40it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:28<49:07:41, 28.24s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:10, 67.83it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:32<00:31, 118.96it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:05, 223.71it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 149.88it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 386557.86it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:28<50:07:38, 28.82s/it][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:29<00:31, 118.64it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:31<00:07, 232.99it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:38<00:07, 168.63it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:43<00:00, 145.49it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 360025.85it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<47:25:39, 27.27s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:31<01:14, 63.95it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:07, 237.20it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:33<00:04, 263.46it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 150.74it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 362210.11it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<44:40:07, 25.68s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:11, 66.60it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:31, 118.69it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:06, 273.64it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:05, 216.52it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:40<00:00, 153.70it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 361671.52it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<48:34:25, 27.92s/it][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:35, 106.06it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:38<00:06, 187.96it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 147.49it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 294970.87it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:28<49:08:59, 28.26s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:45, 49.68it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:29<00:06, 284.69it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:06, 183.10it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:45<00:00, 136.90it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 376945.08it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<48:17:52, 27.77s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:48, 48.54it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:29<01:01, 78.05it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:30<00:23, 161.11it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:04, 263.17it/s][A[A

convert squad examples to features:  88%|████████▊ | 5501/6263 [00:39<00:03, 248.70it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 152.57it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 376000.89it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:28<49:36:55, 28.52s/it][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:29<00:31, 120.01it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:33<00:08, 205.66it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:07, 180.07it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 149.35it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 375742.73it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:28<49:11:07, 28.28s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:29<01:05, 72.37it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:32<00:32, 116.04it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:05, 241.86it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 146.98it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 376793.69it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:23<41:42:31, 23.98s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:30<02:01, 43.22it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:31<01:10, 68.00it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:33<00:28, 132.13it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 147.71it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 355908.93it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<48:28:21, 27.87s/it][A[A

convert squad examples to features:  32%|███▏      | 2001/6263 [00:28<00:42, 100.61it/s][A[A

convert squad examples to features:  40%|████      | 2529/6263 [00:32<00:34, 108.28it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:35<00:05, 235.82it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 151.73it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 355398.52it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/6263 [00:26<45:41:47, 26.27s/it][A
convert squad examples to features:  16%|█▌        | 1001/6263 [00:27<01:40, 52.15it/s][A
convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:05, 72.41it/s][A
convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:06, 269.08it/s][A
convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:05, 238.72it/s][A
convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 146.21it/s]

add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 358546.73it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]

Running Epoch 2 of 2:   0%|          | 0/8698 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:20<34:50:48, 20.03s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:26<01:50, 47.75it/s][A[A

convert squad examples to features:  32%|███▏      | 2001/6263 [00:28<00:40, 106.34it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:32<00:34, 109.07it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:33<00:06, 269.19it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:38<00:06, 208.62it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:39<00:00, 157.56it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 369355.41it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<43:58:34, 25.28s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:28<01:05, 72.20it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:32<00:34, 108.54it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:39<00:06, 198.00it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:43<00:00, 144.66it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 397935.65it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<43:35:06, 25.06s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:27<01:45, 49.74it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:31, 121.18it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:06, 260.65it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:34<00:05, 252.39it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:45<00:00, 137.12it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 363588.78it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:26<46:06:29, 26.51s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:27<01:00, 78.66it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:32, 114.91it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:07, 248.60it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:05, 211.48it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 151.98it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 382404.95it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<44:46:04, 25.74s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:27<01:44, 50.28it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:32<00:31, 118.25it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:06, 258.69it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:05, 216.72it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 147.49it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 383085.31it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:23<40:55:57, 23.53s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:50, 47.65it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:29<01:04, 73.56it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:26, 139.78it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:39<00:05, 224.79it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 148.39it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 341825.22it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:21<37:45:30, 21.71s/it][A[A

convert squad examples to features:   8%|▊         | 501/6263 [00:22<03:00, 31.98it/s] [A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:29<01:51, 47.10it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:02, 76.52it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:33<00:05, 299.96it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:04, 268.58it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 150.21it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 381090.16it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:29<51:24:01, 29.55s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:32<01:14, 63.55it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:34<00:34, 108.39it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:39<00:05, 224.07it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:45<00:00, 138.41it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 385056.30it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<45:06:33, 25.93s/it][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:26<00:59, 79.64it/s][A[A

convert squad examples to features:  32%|███▏      | 2001/6263 [00:27<00:38, 109.58it/s][A[A

convert squad examples to features:  48%|████▊     | 3001/6263 [00:28<00:17, 188.86it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:31<00:05, 293.76it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:38<00:06, 181.61it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:43<00:00, 144.70it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 359725.11it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:28<49:35:14, 28.51s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:29<01:51, 47.00it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:01, 77.42it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:38<00:05, 225.03it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 146.37it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 375388.35it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:19<34:17:10, 19.71s/it][A[A

convert squad examples to features:   8%|▊         | 501/6263 [00:21<03:03, 31.42it/s] [A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:27<01:41, 51.97it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:31<01:07, 70.16it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:33<00:28, 132.04it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:04, 279.98it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 148.61it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 369308.67it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:30<52:26:33, 30.15s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:30<01:53, 46.32it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:34<00:32, 115.61it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:39<00:05, 216.12it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:44<00:00, 140.21it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 367443.82it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:24<43:26:12, 24.97s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:51, 47.13it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:32<01:13, 64.67it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:35<00:04, 271.28it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 151.37it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 356454.66it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:25<44:30:07, 25.58s/it][A[A

convert squad examples to features:  32%|███▏      | 2001/6263 [00:29<00:47, 90.06it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:34, 108.08it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:07, 249.65it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:05, 223.30it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 150.23it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 336324.05it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:27<48:11:10, 27.70s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:28<01:46, 49.56it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:04, 73.48it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:06, 293.30it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:34<00:04, 294.18it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:42<00:00, 146.42it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 405278.34it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:19<33:51:26, 19.46s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:29<02:02, 43.13it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:10, 67.84it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:31<00:28, 133.12it/s][A[A

convert squad examples to features:  72%|███████▏  | 4501/6263 [00:32<00:05, 324.02it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:36<00:04, 257.83it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:38<00:00, 161.09it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 376227.06it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/6263 [00:26<45:51:13, 26.36s/it][A[A

convert squad examples to features:  16%|█▌        | 1001/6263 [00:27<01:40, 52.39it/s][A[A

convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:08, 69.81it/s][A[A

convert squad examples to features:  40%|███▉      | 2501/6263 [00:32<00:28, 131.41it/s][A[A

convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:04, 253.21it/s][A[A

convert squad examples to features: 100%|██████████| 6263/6263 [00:39<00:00, 156.85it/s]


add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 358136.12it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/6263 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/6263 [00:26<46:45:12, 26.88s/it][A
convert squad examples to features:  24%|██▍       | 1501/6263 [00:30<01:10, 67.26it/s][A
convert squad examples to features:  72%|███████▏  | 4501/6263 [00:31<00:07, 239.27it/s][A
convert squad examples to features:  80%|███████▉  | 5001/6263 [00:37<00:06, 183.78it/s][A
convert squad examples to features: 100%|██████████| 6263/6263 [00:41<00:00, 152.57it/s]

add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 373924.24it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]

(17396,
 {'global_step': [500,
   1000,
   1500,
   2000,
   2500,
   3000,
   3500,
   4000,
   4500,
   5000,
   5500,
   6000,
   6500,
   7000,
   7500,
   8000,
   8500,
   8698,
   9000,
   9500,
   10000,
   10500,
   11000,
   11500,
   12000,
   12500,
   13000,
   13500,
   14000,
   14500,
   15000,
   15500,
   16000,
   16500,
   17000,
   17396],
  'correct': [3315,
   3640,
   3861,
   3984,
   4072,
   4042,
   4126,
   4125,
   4129,
   4150,
   4180,
   4217,
   4185,
   4232,
   4220,
   4263,
   4238,
   4253,
   4278,
   4275,
   4261,
   4257,
   4228,
   4263,
   4263,
   4284,
   4279,
   4292,
   4288,
   4279,
   4291,
   4286,
   4289,
   4294,
   4295,
   4293],
  'similar': [2656,
   2303,
   2098,
   1890,
   1868,
   1934,
   1804,
   1815,
   1777,
   1795,
   1781,
   1700,
   1805,
   1719,
   1717,
   1691,
   1722,
   1710,
   1673,
   1690,
   1691,
   1700,
   1691,
   1698,
   1679,
   1670,
   1682,
   1665,
   1672,
   1689,
   1669,
   1674,
  

## Paso 8: Evaluación de los resultados

In [24]:
result_original, texts_original = model_original.eval_model(eval_data)
print("📊 Evaluation results:")
print(result_original)

convert squad examples to features: 100%|██████████| 6263/6263 [00:46<00:00, 133.81it/s]
add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 364638.55it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]

📊 Evaluation results:
{'correct': 3075, 'similar': 2614, 'incorrect': 574, 'eval_loss': -6.884965852649007}


In [25]:
result, texts = model.eval_model(eval_data)
print("📊 Evaluation results:")
print(result)

convert squad examples to features: 100%|██████████| 6263/6263 [00:48<00:00, 130.03it/s]
add example index and unique id: 100%|██████████| 6263/6263 [00:00<00:00, 379910.71it/s]


Running Evaluation:   0%|          | 0/3775 [00:00<?, ?it/s]

📊 Evaluation results:
{'correct': 4293, 'similar': 1664, 'incorrect': 306, 'eval_loss': -9.040801945364239}


In [27]:
correct = result_original['correct']
similar = result_original['similar']
incorrect = result_original['incorrect']
total = correct + similar + incorrect

# 1️⃣ Exact Match Accuracy
exact_match = correct / total

# 2️⃣ Weighted Accuracy (partial credit for 'similar')
weighted_accuracy = (correct + 0.5 * similar) / total

# 3️⃣ F1 Score approximation
TP = correct + 0.5 * similar
FN = 0.5 * similar + incorrect
# Assuming FP = 0 (as Simple Transformers counts predictions, not negatives)
precision = TP / TP
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)

# Print results
print(f"Exact Match (EM): {exact_match:.4f} → {exact_match*100:.2f}%")
print(f"Weighted Accuracy: {weighted_accuracy:.4f} → {weighted_accuracy*100:.2f}%")
print(f"F1 Score: {f1_score:.4f} → {f1_score*100:.2f}%")

Exact Match (EM): 0.4910 → 49.10%
Weighted Accuracy: 0.6997 → 69.97%
F1 Score: 0.8233 → 82.33%


In [26]:
correct = result['correct']
similar = result['similar']
incorrect = result['incorrect']
total = correct + similar + incorrect

# 1️⃣ Exact Match Accuracy
exact_match = correct / total

# 2️⃣ Weighted Accuracy (partial credit for 'similar')
weighted_accuracy = (correct + 0.5 * similar) / total

# 3️⃣ F1 Score approximation
TP = correct + 0.5 * similar
FN = 0.5 * similar + incorrect
# Assuming FP = 0 (as Simple Transformers counts predictions, not negatives)
precision = TP / TP
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)

# Print results
print(f"Exact Match (EM): {exact_match:.4f} → {exact_match*100:.2f}%")
print(f"Weighted Accuracy: {weighted_accuracy:.4f} → {weighted_accuracy*100:.2f}%")
print(f"F1 Score: {f1_score:.4f} → {f1_score*100:.2f}%")

Exact Match (EM): 0.6855 → 68.55%
Weighted Accuracy: 0.8183 → 81.83%
F1 Score: 0.9001 → 90.01%


## Paso 9: Guardar los resultados

In [29]:
# Folder to save
local_path = "./QA_model_bert"
os.makedirs(local_path, exist_ok=True)

# Save the Hugging Face model & tokenizer directly
model.model.save_pretrained(local_path)       # Saves weights + config
model.tokenizer.save_pretrained(local_path)   # Saves vocab + tokenizer config

# Check files
!ls -l ./QA_model

ls: cannot access './QA_model': No such file or directory


In [30]:
shutil.make_archive("QA_model_bert", 'zip', local_path)
print("✅ Zipped model")
!ls -lh QA_model.zip

✅ Zipped model
ls: cannot access 'QA_model.zip': No such file or directory


In [31]:
files.download("QA_model_bert.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [82]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [36]:
from huggingface_hub import login

# This will open a prompt for your Hugging Face token
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [39]:
from huggingface_hub import whoami
print(whoami())


{'type': 'user', 'id': '6682cc422b6af3f60a185123', 'name': 'BlueAutomata', 'fullname': 'Guillermo Luigui Ubaldo Nieto Angarita', 'email': 'guillermo.luigui.nieto@gmail.com', 'emailVerified': True, 'canPay': False, 'periodEnd': None, 'isPro': False, 'avatarUrl': '/avatars/b6cf26e7fac6e034fc5f2b2b87f9ff70.svg', 'orgs': [], 'auth': {'type': 'access_token', 'accessToken': {'displayName': 'write_token', 'role': 'write', 'createdAt': '2025-10-26T19:35:35.613Z'}}}


In [41]:

from huggingface_hub import login, create_repo
from transformers import AutoModelForQuestionAnswering, AutoTokenizer

# 1️⃣ Define your paths and repo name
model_dir = "./outputs"
repo_id = "BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico"

# 2️⃣ Create the repo (won’t fail if it already exists)
create_repo(repo_id, private=False, exist_ok=True)

# 3️⃣ Load your SimpleTransformers model as a standard HF model
hf_model = AutoModelForQuestionAnswering.from_pretrained(model_dir)
hf_tokenizer = AutoTokenizer.from_pretrained(model_dir)

# 4️⃣ Push to the Hugging Face Hub
hf_model.push_to_hub(
    repo_id,
    description="BERT-base Spanish WWM cased model fine-tuned for extractive QA on news articles from Colombia and Mexico.",
    tags=["spanish", "qa", "news", "colombia", "mexico", "bert-base", "wwm", "cased"]
)

hf_tokenizer.push_to_hub(repo_id)

The following layers were not sharded: bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.attention.self.value.weight, qa_outputs.weight, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, qa_outputs.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.dense.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.b

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...0eaw9mg/model.safetensors:   0%|          |  558kB /  437MB            

README.md: 0.00B [00:00, ?B/s]

CommitInfo(commit_url='https://huggingface.co/BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico/commit/1ed334653334e746169668dd1d0d68f0e0d64070', commit_message='Upload tokenizer', commit_description='', oid='1ed334653334e746169668dd1d0d68f0e0d64070', pr_url=None, repo_url=RepoUrl('https://huggingface.co/BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico', endpoint='https://huggingface.co', repo_type='model', repo_id='BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico'), pr_revision=None, pr_num=None)

In [42]:

shutil.move("QA_model_bert.zip", "/content/drive/MyDrive/Tahesis_QA_Optimization/Model")

'/content/drive/MyDrive/Thesis_QA_Optimization/Model/QA_model_bert.zip'

In [43]:
# Path to the folder containing the saved model
model_path = "./QA_model_bert"  # change if different

# Reload the model
my_model = QuestionAnsweringModel(
    "bert",
    model_path,
    use_cuda=True  # set to False if no GPU
)

The following layers were not sharded: bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.attention.self.value.weight, qa_outputs.weight, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, qa_outputs.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.dense.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.b

In [44]:
# Context & question
context = "Ciudad de México. El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez, fue hallado muerto en Tamaulipas."
question = "¿Quién fue hallado muerto en Tamaulipas?"

# Prepare input in SimpleTransformers format
to_predict = [
    {
        "context": context,
        "qas": [
            {
                "id": "0",
                "question": question,
                "answers": [{"text": " ", "answer_start": 0}],
                "is_impossible": False
            }
        ]
    }
]

# Run prediction
answers = my_model.predict(to_predict)
print(answers)

convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 204.40it/s]
add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 11397.57it/s]


Running Prediction:   0%|          | 0/1 [00:00<?, ?it/s]

([{'id': '0', 'answer': ['Abraham Jeremías Pérez Ramírez,', 'El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Ramírez', 'capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez', 'Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue hallado muerto en Tamaulipas.', 'Abraham', 'Ciudad de México. El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Pérez Ramírez', 'Abraham Jeremías', 'la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue hallado muerto', 'de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue', 'de Marina, Abraham Jeremías Pérez Ramírez,', 'México. El capitán de la Secretaría de Marina, Abraham Jeremías Pérez Ramírez,', 'Abraham Jeremías Pérez Ramírez, fue hallado']}], [{'id': '0', 'probability': 

  with amp.autocast():


In [45]:
# Load your fine-tuned model from the Hub
qa = pipeline(
    "question-answering",
    model="BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico",
    tokenizer="BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico"
)

# Provide Spanish context
contexto = """
El presidente Gustavo Petro anunció nuevas medidas para impulsar el uso de energías renovables en Colombia,
especialmente en la región del Caribe, donde los proyectos solares y eólicos han ganado protagonismo.
El objetivo del gobierno es reducir las emisiones de carbono en un 30% para el año 2030.
"""

# Ask questions in Spanish
preguntas = [
    "¿Quién anunció nuevas medidas para energías renovables?",
    "¿En qué región se impulsarán los proyectos solares y eólicos?",
    "¿Cuál es el objetivo del gobierno para 2030?"
]

# Evaluate each question
for pregunta in preguntas:
    respuesta = qa(question=pregunta, context=contexto)
    print(f"❓ {pregunta}\n💬 {respuesta['answer']}\n")

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/437M [00:00<?, ?B/s]

The following layers were not sharded: bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.attention.self.value.weight, qa_outputs.weight, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, qa_outputs.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.dense.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.b

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

Device set to use cuda:0


❓ ¿Quién anunció nuevas medidas para energías renovables?
💬 Gustavo Petro

❓ ¿En qué región se impulsarán los proyectos solares y eólicos?
💬 Caribe

❓ ¿Cuál es el objetivo del gobierno para 2030?
💬 reducir las emisiones de carbono en un 30%



In [49]:
def flatten_squad(dataset):
    # If the dataset is a dict with "data", extract it
    if isinstance(dataset, dict) and "data" in dataset:
        dataset = dataset["data"]

    new_data = []
    for article in dataset:
        for para in article["paragraphs"]:
            new_data.append({
                "title": article.get("title", ""),
                "context": para["context"],
                "qas": para["qas"]
            })
    return new_data


In [50]:
flat_eval = flatten_squad(eval_dataset)

In [51]:
from transformers import pipeline
import evaluate

qa_pipeline = pipeline(
    "question-answering",
    model="BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico",
    tokenizer="BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico"
)

metric = evaluate.load("squad")

The following layers were not sharded: bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.attention.self.value.weight, qa_outputs.weight, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, qa_outputs.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.dense.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.b

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

In [57]:
from transformers import pipeline
import evaluate

qa_pipeline_original = pipeline(
    "question-answering",
    model="mrm8488/bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",
    tokenizer="mrm8488/bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
)


The following layers were not sharded: bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.attention.self.value.weight, qa_outputs.weight, bert.encoder.layer.*.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, qa_outputs.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.output.LayerNorm.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.intermediate.dense.weight, bert.encoder.layer.*.output.dense.bias, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.attention.self.query.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.self.query.bias, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.b

In [58]:
predictions_original = []
references_original = []

for ex in flat_eval:
    context = ex["context"]
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            continue  # Skip unanswerable questions

        # Run QA prediction
        pred = qa_pipeline_original(question=qa["question"], context=context)

        # Collect prediction and reference
        predictions_original.append({
            "id": qa["id"],
            "prediction_text": pred["answer"]
        })

        references_original.append({
            "id": qa["id"],
            "answers": {
                "text": [a["text"] for a in qa["answers"]],
                "answer_start": [a["answer_start"] for a in qa["answers"]]
            }
        })


In [55]:
predictions = []
references = []

for ex in flat_eval:
    context = ex["context"]
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            continue  # Skip unanswerable questions

        # Run QA prediction
        pred = qa_pipeline(question=qa["question"], context=context)

        # Collect prediction and reference
        predictions.append({
            "id": qa["id"],
            "prediction_text": pred["answer"]
        })

        references.append({
            "id": qa["id"],
            "answers": {
                "text": [a["text"] for a in qa["answers"]],
                "answer_start": [a["answer_start"] for a in qa["answers"]]
            }
        })


In [59]:
results = metric.compute(predictions=predictions_original, references=references_original)
print("📊 Evaluation results:")
print(f"Exact Match: {results['exact_match']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

📊 Evaluation results:
Exact Match: 60.14
F1 Score: 77.00


In [56]:
results = metric.compute(predictions=predictions, references=references)
print("📊 Evaluation results:")
print(f"Exact Match: {results['exact_match']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

📊 Evaluation results:
Exact Match: 75.21
F1 Score: 85.09


In [60]:
import evaluate
metric = evaluate.load("squad_v2")

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

In [76]:
from tqdm import tqdm

predictions_original = []
references_original = []

for ex in tqdm(flat_eval):
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            # Questions that have no valid answer in the text
            predictions_original.append({
                "id": qa["id"],
                "prediction_text": "",
                "no_answer_probability": 1.0   # fully confident it's unanswerable
            })
            references_original.append({
                "id": qa["id"],
                "answers": {"text": [], "answer_start": []}
            })
        else:
            # Normal (answerable) questions
            pred = qa_pipeline_original(question=qa["question"], context=ex["context"])

            predictions_original.append({
                "id": qa["id"],
                "prediction_text": pred["answer"],
                # Use model confidence inversely as no-answer probability
                "no_answer_probability": 1.0 - pred.get("score", 0.0)
            })

            references_original.append({
                "id": qa["id"],
                "answers": {
                    "text": [a["text"] for a in qa["answers"]],
                    "answer_start": [a["answer_start"] for a in qa["answers"]]
                }
            })

100%|██████████| 2845/2845 [03:10<00:00, 14.90it/s]


In [77]:
from tqdm import tqdm

predictions = []
references = []

for ex in tqdm(flat_eval):
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            # Questions that have no valid answer in the text
            predictions.append({
                "id": qa["id"],
                "prediction_text": "",
                "no_answer_probability": 1.0   # fully confident it's unanswerable
            })
            references.append({
                "id": qa["id"],
                "answers": {"text": [], "answer_start": []}
            })
        else:
            # Normal (answerable) questions
            pred = qa_pipeline(question=qa["question"], context=ex["context"])

            predictions.append({
                "id": qa["id"],
                "prediction_text": pred["answer"],
                # Use model confidence inversely as no-answer probability
                "no_answer_probability": 1.0 - pred.get("score", 0.0)
            })

            references.append({
                "id": qa["id"],
                "answers": {
                    "text": [a["text"] for a in qa["answers"]],
                    "answer_start": [a["answer_start"] for a in qa["answers"]]
                }
            })

100%|██████████| 2845/2845 [02:58<00:00, 15.96it/s]


In [71]:
predictions_original

[{'id': 'qa-15740', 'prediction_text': 'Illinois'},
 {'id': 'qa-15744', 'prediction_text': 'Salón México'},
 {'id': 'qa-15745', 'prediction_text': ''},
 {'id': 'qa-15746', 'prediction_text': ''},
 {'id': 'qa-15859', 'prediction_text': ''},
 {'id': 'qa-15856',
  'prediction_text': 'Instituto Nacional de Bellas Artes y Literatura'},
 {'id': 'qa-15901',
  'prediction_text': 'investigación participativa y trabajo comunitario'},
 {'id': 'qa-15902', 'prediction_text': ''},
 {'id': 'qa-15431', 'prediction_text': ''},
 {'id': 'qa-15432', 'prediction_text': ''},
 {'id': 'qa-15426', 'prediction_text': 'Lucina Jiménez'},
 {'id': 'qa-15037', 'prediction_text': ''},
 {'id': 'qa-19941', 'prediction_text': 'Claudia Sheinbaum Pardo'},
 {'id': 'qa-19940', 'prediction_text': '9 por ciento'},
 {'id': 'qa-18801', 'prediction_text': ''},
 {'id': 'qa-17754', 'prediction_text': 'Organización de Salud Mundial'},
 {'id': 'qa-17755', 'prediction_text': 'líderes árabes'},
 {'id': 'qa-9297', 'prediction_text': 'G

In [78]:
results = metric.compute(predictions=predictions_original, references=references_original)

print("📊 Evaluation results:")
print(f"Exact Match: {results['exact']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

📊 Evaluation results:
Exact Match: 72.17
F1 Score: 83.94


In [79]:
results = metric.compute(predictions=predictions, references=references)
print("📊 Evaluation results:")
print(f"Exact Match: {results['exact']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

📊 Evaluation results:
Exact Match: 82.69
F1 Score: 89.59


In [85]:
!mv outputs outputs_bert
!zip -r outputs_bert.zip outputs_bert

mv: cannot stat 'outputs': No such file or directory
  adding: outputs_bert/ (stored 0%)
  adding: outputs_bert/config.json (deflated 49%)
  adding: outputs_bert/model.safetensors (deflated 7%)
  adding: outputs_bert/predictions_test.json (deflated 70%)
  adding: outputs_bert/checkpoint-17396-epoch-2/ (stored 0%)
  adding: outputs_bert/checkpoint-17396-epoch-2/eval_results.txt (deflated 10%)
  adding: outputs_bert/model_args.json (deflated 61%)
  adding: outputs_bert/training_progress_scores.csv (deflated 51%)
  adding: outputs_bert/null_odds_test.json (deflated 76%)
  adding: outputs_bert/nbest_predictions_test.json (deflated 87%)
  adding: outputs_bert/special_tokens_map.json (deflated 42%)
  adding: outputs_bert/vocab.txt (deflated 54%)
  adding: outputs_bert/tokenizer_config.json (deflated 74%)
  adding: outputs_bert/best_model/ (stored 0%)
  adding: outputs_bert/best_model/config.json (deflated 49%)
  adding: outputs_bert/best_model/model.safetensors (deflated 7%)
  adding: output

In [87]:
shutil.move("outputs_bert.zip", "/content/drive/MyDrive/Thesis_QA_Optimization/Model")

Error: Destination path '/content/drive/MyDrive/Thesis_QA_Optimization/Model/outputs_bert.zip' already exists

In [84]:
import os
import shutil

# Define the full path to the destination directory
destination_dir = '/content/drive/MyDrive/Thesis_QA_Optimization/Model'
source_file = 'outputs_bert.zip' # This is the file you want to move

# 1. Check if the directory exists and create it if it doesn't
# The `exist_ok=True` argument prevents an error if the directory already exists.
# The `os.makedirs` function creates all intermediate-level directories needed.
os.makedirs(destination_dir, exist_ok=True)

# 2. Now you can safely move the file
try:
    shutil.move(source_file, destination_dir)
    print(f"Successfully moved {source_file} to {destination_dir}")
except FileNotFoundError as e:
    # This might catch a different FileNotFoundError if the source file doesn't exist,
    # but the primary directory issue should be resolved by os.makedirs.
    print(f"Error moving file: {e}")

Successfully moved outputs_bert.zip to /content/drive/MyDrive/Tahesis_QA_Optimization/Model
