# Entrenamiento con Simple Transformers del model BERT

## Paso 1: Cargar datos en Google Colab

## Paso 2: Instalar librer√≠as

In [2]:
!pip install transformers evaluate torch --quiet
!pip install simpletransformers transformers datasets huggingface_hub scikit-learn
!pip install evaluate --quiet



## Paso 3: Cargar librer√≠as

In [3]:
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
import evaluate
import json
import os
import shutil
import requests


from tqdm import tqdm
from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs
from sklearn.model_selection import train_test_split
from google.colab import files

In [4]:
import logging
logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)

In [5]:
import torch
print("GPU available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))

GPU available: True
GPU name: NVIDIA A100-SXM4-80GB


## Paso 4: Cargar datos

In [6]:
# URLs of the files
urls = {
    "eval_colombia_mexico_dataset.json": "https://github.com/BlueAutomata/tesis-optimizacion-de-modelos-de-question-answering/raw/refs/heads/master/src/datasets/exploration_datasets/gold/eval_colombia_mexico_dataset.json",
    "train_colombia_mexico_dataset.json": "https://github.com/BlueAutomata/tesis-optimizacion-de-modelos-de-question-answering/raw/refs/heads/master/src/datasets/exploration_datasets/gold/train_colombia_mexico_dataset.json"
}

# Dictionary to store the loaded JSON data
datasets = {}

for filename, url in urls.items():
    # Download the file
    response = requests.get(url)
    if response.status_code == 200:
        # Save locally
        with open(filename, "wb") as f:
            f.write(response.content)
        # Load JSON into Python
        datasets[filename] = response.json()
        print(f"{filename} downloaded and loaded successfully!")
    else:
        print(f"Failed to download {filename}. Status code: {response.status_code}")

eval_colombia_mexico_dataset.json downloaded and loaded successfully!
train_colombia_mexico_dataset.json downloaded and loaded successfully!


In [7]:
with open("train_colombia_mexico_dataset.json", "r", encoding="utf-8") as f:
    train_dataset = json.load(f)

print(f"‚úÖ Loaded {len(train_dataset)} records successfully!")

‚úÖ Loaded 1 records successfully!


In [8]:
with open("eval_colombia_mexico_dataset.json", "r", encoding="utf-8") as f:
    eval_dataset = json.load(f)

print(f"‚úÖ Loaded {len(eval_dataset)} records successfully!")

‚úÖ Loaded 1 records successfully!


In [9]:
train_dataset = train_dataset["data"]

In [10]:
eval_dataset = eval_dataset["data"]

In [11]:
# üîß 1Ô∏è‚É£ Flatten your dataset so each row has 'context' and 'qas'
def flatten_squad(dataset):
    new_data = []
    for article in dataset:
        for para in article["paragraphs"]:
            new_data.append({
                "context": para["context"],
                "qas": para["qas"]
            })
    return new_data

In [12]:
train_data = flatten_squad(train_dataset)

In [13]:
eval_data = flatten_squad(eval_dataset)

In [14]:
print(f"‚úÖ Training samples: {len(train_data)}")
print(f"‚úÖ Eval samples: {len(eval_data)}")

‚úÖ Training samples: 4320
‚úÖ Eval samples: 3846


## Paso 5: Definir hiperpar√°metros

In [15]:
model_args = QuestionAnsweringArgs()

# ========================
# ‚öôÔ∏è Training configuration
# ========================
model_args.train_batch_size = 8
model_args.eval_batch_size = 8
model_args.num_train_epochs = 4                # slightly longer training for better convergence
model_args.learning_rate = 3e-5                # standard for BERT fine-tuning
model_args.gradient_accumulation_steps = 4     # effective batch size = 8 √ó 4 = 32
model_args.warmup_ratio = 0.1
model_args.weight_decay = 0.01                 # regularization to prevent overfitting
model_args.overwrite_output_dir = True

# ========================
# üß† Evaluation & Early Stopping
# ========================
model_args.evaluate_during_training = True
model_args.evaluate_during_training_steps = 500
model_args.evaluate_during_training_verbose = True
model_args.save_model_every_epoch = False
model_args.save_eval_checkpoints = True
model_args.save_best_model = True
model_args.metric_for_best_model = "f1"
model_args.greater_is_better = True
model_args.early_stopping_metric = "eval_loss"
model_args.early_stopping_patience = 2
model_args.early_stopping_consider_epochs = True

# ========================
# üìÑ Sequence & Context Settings
# ========================
model_args.max_seq_length = 384       # total sequence length
model_args.doc_stride = 128           # sliding window overlap
model_args.max_query_length = 64      # question length
model_args.max_answer_length = 30

# ========================
# üß© Output & Logging
# ========================
model_args.output_dir = "./outputs/"
model_args.best_model_dir = "./outputs/best_model/"
model_args.logging_steps = 100
model_args.manual_seed = 42

# ========================
# ‚ö° Resource Handling
# ========================
model_args.use_multiprocessing = False             # safer for notebooks
model_args.fp16 = torch.cuda.is_available()        # enable mixed precision if GPU available
model_args.dataloader_num_workers = 2              # small boost in performance
model_args.save_steps = -1                         # don't save intermediate steps

print("‚úÖ Model arguments configured successfully!")

‚úÖ Model arguments configured successfully!


## Paso 6: Cargar el modelo

In [16]:
model_original = QuestionAnsweringModel(
    model_type="bert",
    model_name="mrm8488/bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",  # BETO
    args=model_args,
    use_cuda=torch.cuda.is_available()
)

config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

The following layers were not sharded: bert.encoder.layer.*.intermediate.dense.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, qa_outputs.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.self.query.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.query.weight, qa_outputs.bias, ber

tokenizer_config.json:   0%|          | 0.00/135 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [17]:
model = QuestionAnsweringModel(
    model_type="bert",
    model_name="mrm8488/bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",  # BETO
    args=model_args,
    use_cuda=torch.cuda.is_available()
)

The following layers were not sharded: bert.encoder.layer.*.intermediate.dense.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, qa_outputs.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.self.query.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.query.weight, qa_outputs.bias, ber

## Paso 7: Entrenamiento del modelo

In [18]:
model.train_model(train_data, eval_data=eval_data)

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 19487/19487 [10:48<00:00, 30.05it/s]
add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 19487/19487 [00:00<00:00, 633565.64it/s]


Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 4:   0%|          | 0/13019 [00:00<?, ?it/s]

  with amp.autocast():


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:26<62:15:48, 26.84s/it][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:35<01:27, 72.55it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:38<00:36, 131.75it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:41<00:13, 212.73it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:48<00:13, 170.27it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:56<00:00, 146.56it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 413919.40it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

  with amp.autocast():


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:28<66:53:24, 28.84s/it][A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:30<01:37, 70.24it/s][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:33<01:15, 83.74it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:39<00:35, 135.13it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:44<00:17, 187.26it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:46<00:10, 229.27it/s][A[A

convert squad examples to features:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 6501/8352 [00:50<00:09, 200.11it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:51<00:00, 161.33it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:28<65:11:50, 28.11s/it][A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:29<01:35, 72.05it/s][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:33<01:15, 84.12it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:36<00:30, 158.41it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:43<00:18, 180.04it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:45<00:14, 191.49it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:45<00:10, 231.43it/s][A[A

convert squad examples to features:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 6501/8352 [00:47<00:07, 244.37it/s][A[A

convert squad examples to features:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:30<70:26:21, 30.37s/it][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:33<01:19, 80.38it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:38<00:16, 201.35it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:46<00:18, 154.15it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:55<00:00, 150.32it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 386623.85it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:26<60:33:10, 26.10s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:29<05:36, 23.30it/s] [A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:32<01:02, 101.44it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:40<00:16, 203.66it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:47<00:17, 162.71it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:50<00:14, 162.03it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:51<00:00, 160.99it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 395145.42it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:21<49:23:20, 21.29s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:24<04:45, 27.47it/s] [A[A

convert squad examples to features:  12%|‚ñà‚ñè        | 1001/8352 [00:25<01:56, 63.00it/s][A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:30<01:28, 77.19it/s][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:33<01:08, 92.62it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:40<00:32, 148.71it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:41<00:13, 248.69it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:42<00:10, 281.53it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:49<00:13, 169.98

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/8352 [00:24<57:38:52, 24.85s/it][A
convert squad examples to features:   6%|‚ñå         | 501/8352 [00:28<05:31, 23.67it/s] [A
convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:30<00:57, 111.24it/s][A
convert squad examples to features:  36%|‚ñà‚ñà‚ñà‚ñå      | 3001/8352 [00:32<00:32, 164.68it/s][A
convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:36<00:30, 157.55it/s][A
convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:36<00:11, 301.77it/s][A
convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:44<00:16, 177.27it/s][A
convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:46<00:12, 183.68it/s][A
convert squad examples to features:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 6501/8352 [00:47<00:08, 231.26it/s][A
convert s

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

Running Epoch 2 of 4:   0%|          | 0/13019 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:30<70:17:22, 30.30s/it][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:30<01:09, 91.44it/s][A[A

convert squad examples to features:  36%|‚ñà‚ñà‚ñà‚ñå      | 3001/8352 [00:31<00:35, 151.03it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:35<00:33, 146.78it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:36<00:13, 251.05it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:46<00:19, 142.80it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:59<00:19, 142.80it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:59<00:25, 92.23it/s] [A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:29<69:13:48, 29.84s/it][A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:30<01:38, 69.61it/s][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:35<01:22, 76.83it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:38<00:13, 243.56it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:42<00:13, 203.93it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:49<00:14, 159.80it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:53<00:00, 154.79it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 392995.43it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:27<62:45:37, 27.06s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:29<05:37, 23.26it/s] [A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:31<01:23, 81.94it/s][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:31<00:53, 118.51it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:36<00:26, 183.54it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:44<00:18, 184.50it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:45<00:13, 218.42it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:45<00:08, 261.40it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:56

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:22<53:08:19, 22.91s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:27<05:21, 24.41it/s] [A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:38<01:26, 73.77it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:45<00:19, 169.12it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:47<00:11, 211.93it/s][A[A

convert squad examples to features:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 7501/8352 [00:54<00:04, 210.06it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:55<00:00, 149.90it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 392379.16it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:24<57:03:54, 24.60s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:27<05:18, 24.69it/s] [A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:28<01:16, 89.80it/s][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:35<01:19, 80.28it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:39<00:31, 153.05it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:42<00:14, 233.71it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:44<00:12, 227.50it/s][A[A

convert squad examples to features:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 6501/8352 [00:46<00:06, 291.48it/s][A[A

convert squad examples to features:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 7001/8352 [00:4

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:22<51:12:30, 22.08s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:28<05:43, 22.87it/s] [A[A

convert squad examples to features:  12%|‚ñà‚ñè        | 1001/8352 [00:29<02:19, 52.87it/s][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:35<01:10, 90.03it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:40<00:31, 155.27it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:41<00:12, 262.00it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:45<00:13, 213.89it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:51<00:14, 160.27it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:27<64:06:03, 27.63s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:28<05:17, 24.71it/s] [A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:33<01:06, 95.99it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:40<00:16, 207.61it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:43<00:14, 203.17it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:47<00:12, 187.12it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:56<00:00, 147.88it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 379113.30it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/8352 [00:21<49:51:16, 21.49s/it][A
convert squad examples to features:   6%|‚ñå         | 501/8352 [00:22<04:09, 31.46it/s] [A
convert squad examples to features:  12%|‚ñà‚ñè        | 1001/8352 [00:27<02:23, 51.22it/s][A
convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:30<00:57, 111.13it/s][A
convert squad examples to features:  36%|‚ñà‚ñà‚ñà‚ñå      | 3001/8352 [00:33<00:32, 165.52it/s][A
convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:36<00:29, 166.69it/s][A
convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:40<00:14, 233.73it/s][A
convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:43<00:13, 206.03it/s][A
convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:47<00:12, 191.70it/s][A
convert squad examples

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

Running Epoch 3 of 4:   0%|          | 0/13019 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[AException ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x790fedd02ca0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x790fedd02ca0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  Fi

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:28<65:27:41, 28.22s/it][A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:30<01:38, 69.46it/s][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:36<01:29, 71.24it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:37<00:12, 261.48it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:44<00:15, 180.36it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:47<00:12, 185.96it/s][A[A

convert squad examples to features:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 6501/8352 [00:48<00:08, 208.44it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:55<00:00, 149.67it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:28<64:57:30, 28.00s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:28<05:20, 24.50it/s] [A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:34<01:09, 91.44it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:38<00:30, 158.81it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:45<00:18, 177.65it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:49<00:16, 167.90it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:55<00:00, 151.16it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 402667.07it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:29<69:18:04, 29.87s/it][A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:30<01:39, 68.54it/s][A[A

convert squad examples to features:  36%|‚ñà‚ñà‚ñà‚ñå      | 3001/8352 [00:32<00:35, 152.68it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:39<00:16, 203.85it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:45<00:17, 166.29it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:47<00:13, 176.03it/s][A[A

convert squad examples to features:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 6501/8352 [00:49<00:09, 189.17it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:56<00:00, 147.71it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:27<62:45:44, 27.06s/it][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:35<01:27, 72.88it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:38<00:37, 129.14it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:44<00:15, 185.72it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:48<00:13, 174.06it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:53<00:00, 156.51it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 398326.53it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:28<65:47:02, 28.36s/it][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:35<01:26, 73.47it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:38<00:36, 133.42it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:43<00:14, 203.17it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:47<00:13, 179.58it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:54<00:00, 153.56it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 382303.23it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/8352 [00:21<49:11:50, 21.21s/it][A
convert squad examples to features:   6%|‚ñå         | 501/8352 [00:27<05:32, 23.64it/s] [A
convert squad examples to features:  12%|‚ñà‚ñè        | 1001/8352 [00:28<02:14, 54.82it/s][A
convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:31<01:27, 78.01it/s][A
convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:37<01:18, 80.75it/s][A
convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:43<00:34, 142.24it/s][A
convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:44<00:14, 235.85it/s][A
convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:51<00:11, 200.54it/s][A
convert squad examples to features:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 7501/8352 [00:55<00:03, 242.89it/s][A
convert squad examples t

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

Running Epoch 4 of 4:   0%|          | 0/13019 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:28<66:41:14, 28.75s/it][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:45<01:56, 54.63it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:45<00:19, 169.02it/s][A[A

convert squad examples to features:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 5749/8352 [00:46<00:12, 205.18it/s][A[A

convert squad examples to features:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 6159/8352 [00:47<00:10, 207.22it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:57<00:00, 146.18it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 352841.67it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:24<56:23:19, 24.31s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:27<05:22, 24.33it/s] [A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:30<01:24, 80.72it/s][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:35<01:15, 84.14it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:42<00:15, 216.89it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:43<00:12, 236.21it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:46<00:11, 209.07it/s][A[A

convert squad examples to features:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 6501/8352 [00:49<00:09, 204.65it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/83

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:23<54:38:41, 23.56s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:28<05:34, 23.48it/s] [A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:31<01:32, 74.11it/s][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:35<01:14, 85.00it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:42<00:35, 135.63it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:44<00:11, 244.68it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:49<00:11, 200.76it/s][A[A

convert squad examples to features:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 6501/8352 [00:50<00:08, 230.58it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:22<53:03:05, 22.87s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:28<05:38, 23.17it/s] [A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:33<01:09, 91.61it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:36<00:29, 161.74it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:40<00:15, 211.15it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:48<00:18, 150.90it/s][A[A

convert squad examples to features:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 7501/8352 [00:52<00:03, 232.40it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:53<00:00, 154.77it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 83

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:27<63:45:05, 27.48s/it][A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:31<01:44, 65.63it/s][A[A

convert squad examples to features:  36%|‚ñà‚ñà‚ñà‚ñå      | 3001/8352 [00:32<00:36, 148.21it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:36<00:34, 140.37it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:44<00:21, 157.72it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:46<00:16, 170.42it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:51<00:15, 154.69it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:52<00:00, 159.02it/s]


add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:20<46:51:35, 20.20s/it][A[A

convert squad examples to features:   6%|‚ñå         | 501/8352 [00:23<04:27, 29.34it/s] [A[A

convert squad examples to features:  12%|‚ñà‚ñè        | 1001/8352 [00:29<02:40, 45.87it/s][A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:30<01:26, 78.89it/s][A[A

convert squad examples to features:  36%|‚ñà‚ñà‚ñà‚ñå      | 3001/8352 [00:32<00:26, 200.84it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:33<00:22, 219.77it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:40<00:14, 228.98it/s][A[A

convert squad examples to features:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 5501/8352 [00:45<00:16, 172.69it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:47<00:12, 189

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]



convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A[A

convert squad examples to features:   0%|          | 1/8352 [00:25<59:23:34, 25.60s/it][A[A

convert squad examples to features:  18%|‚ñà‚ñä        | 1501/8352 [00:30<01:42, 66.72it/s][A[A

convert squad examples to features:  24%|‚ñà‚ñà‚ñç       | 2001/8352 [00:33<01:20, 79.33it/s][A[A

convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:39<00:36, 134.68it/s][A[A

convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:43<00:17, 188.31it/s][A[A

convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:46<00:10, 221.88it/s][A[A

convert squad examples to features:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 6501/8352 [00:48<00:08, 227.73it/s][A[A

convert squad examples to features:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 7001/8352 [00:50<00:05, 230.00it/s][A[A

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñ

Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]


convert squad examples to features:   0%|          | 0/8352 [00:00<?, ?it/s][A
convert squad examples to features:   0%|          | 1/8352 [00:28<65:34:26, 28.27s/it][A
convert squad examples to features:   6%|‚ñå         | 501/8352 [00:30<05:37, 23.26it/s] [A
convert squad examples to features:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 3501/8352 [00:38<00:34, 140.50it/s][A
convert squad examples to features:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 5001/8352 [00:46<00:20, 160.05it/s][A
convert squad examples to features:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 6001/8352 [00:50<00:13, 175.06it/s][A
convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:51<00:00, 161.86it/s]

add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 391602.78it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

(13016,
 {'global_step': [500,
   1000,
   1500,
   2000,
   2500,
   3000,
   3254,
   3500,
   4000,
   4500,
   5000,
   5500,
   6000,
   6500,
   6508,
   7000,
   7500,
   8000,
   8500,
   9000,
   9500,
   9762,
   10000,
   10500,
   11000,
   11500,
   12000,
   12500,
   13000,
   13016],
  'correct': [5312,
   5453,
   5600,
   5558,
   5602,
   5750,
   5687,
   5684,
   5685,
   5674,
   5774,
   5715,
   5752,
   5722,
   5711,
   5682,
   5731,
   5728,
   5738,
   5726,
   5738,
   5782,
   5776,
   5771,
   5775,
   5773,
   5780,
   5750,
   5765,
   5764],
  'similar': [2606,
   2411,
   2313,
   2342,
   2218,
   2118,
   2267,
   2177,
   2108,
   2166,
   2106,
   2151,
   2186,
   2182,
   2191,
   2170,
   2171,
   2179,
   2122,
   2091,
   2163,
   2133,
   2148,
   2125,
   2111,
   2151,
   2130,
   2143,
   2130,
   2129],
  'incorrect': [434,
   488,
   439,
   452,
   532,
   484,
   398,
   491,
   559,
   512,
   472,
   486,
   414,
   448,
   450,
  

## Paso 8: Evaluaci√≥n de los resultados

In [19]:
result_original, texts_original = model_original.eval_model(eval_data)
print("üìä Evaluation results:")
print(result_original)

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [01:08<00:00, 122.06it/s]
add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 387753.64it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

üìä Evaluation results:
{'correct': 4295, 'similar': 3296, 'incorrect': 761, 'eval_loss': -6.929708698896527}


In [20]:
result, texts = model.eval_model(eval_data)
print("üìä Evaluation results:")
print(result)

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:59<00:00, 140.73it/s]
add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8352/8352 [00:00<00:00, 398943.47it/s]


Running Evaluation:   0%|          | 0/5528 [00:00<?, ?it/s]

üìä Evaluation results:
{'correct': 5764, 'similar': 2129, 'incorrect': 459, 'eval_loss': -10.854899206313315}


In [21]:
correct = result_original['correct']
similar = result_original['similar']
incorrect = result_original['incorrect']
total = correct + similar + incorrect

# 1Ô∏è‚É£ Exact Match Accuracy
exact_match = correct / total

# 2Ô∏è‚É£ Weighted Accuracy (partial credit for 'similar')
weighted_accuracy = (correct + 0.5 * similar) / total

# 3Ô∏è‚É£ F1 Score approximation
TP = correct + 0.5 * similar
FN = 0.5 * similar + incorrect
# Assuming FP = 0 (as Simple Transformers counts predictions, not negatives)
precision = TP / TP
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)

# Print results
print(f"Exact Match (EM): {exact_match:.4f} ‚Üí {exact_match*100:.2f}%")
print(f"Weighted Accuracy: {weighted_accuracy:.4f} ‚Üí {weighted_accuracy*100:.2f}%")
print(f"F1 Score: {f1_score:.4f} ‚Üí {f1_score*100:.2f}%")

Exact Match (EM): 0.5142 ‚Üí 51.42%
Weighted Accuracy: 0.7116 ‚Üí 71.16%
F1 Score: 0.8315 ‚Üí 83.15%


In [22]:
correct = result['correct']
similar = result['similar']
incorrect = result['incorrect']
total = correct + similar + incorrect

# 1Ô∏è‚É£ Exact Match Accuracy
exact_match = correct / total

# 2Ô∏è‚É£ Weighted Accuracy (partial credit for 'similar')
weighted_accuracy = (correct + 0.5 * similar) / total

# 3Ô∏è‚É£ F1 Score approximation
TP = correct + 0.5 * similar
FN = 0.5 * similar + incorrect
# Assuming FP = 0 (as Simple Transformers counts predictions, not negatives)
precision = TP / TP
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)

# Print results
print(f"Exact Match (EM): {exact_match:.4f} ‚Üí {exact_match*100:.2f}%")
print(f"Weighted Accuracy: {weighted_accuracy:.4f} ‚Üí {weighted_accuracy*100:.2f}%")
print(f"F1 Score: {f1_score:.4f} ‚Üí {f1_score*100:.2f}%")

Exact Match (EM): 0.6901 ‚Üí 69.01%
Weighted Accuracy: 0.8176 ‚Üí 81.76%
F1 Score: 0.8996 ‚Üí 89.96%


## Paso 9: Guardar los resultados

In [23]:
# Folder to save
local_path = "./QA_model_bert"
os.makedirs(local_path, exist_ok=True)

# Save the Hugging Face model & tokenizer directly
model.model.save_pretrained(local_path)       # Saves weights + config
model.tokenizer.save_pretrained(local_path)   # Saves vocab + tokenizer config

# Check files
!ls -l ./QA_model

ls: cannot access './QA_model': No such file or directory


In [24]:
shutil.make_archive("QA_model_bert", 'zip', local_path)
print("‚úÖ Zipped model")
!ls -lh QA_model.zip

‚úÖ Zipped model
ls: cannot access 'QA_model.zip': No such file or directory


In [25]:
files.download("QA_model_bert.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [26]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [27]:
from huggingface_hub import login

# This will open a prompt for your Hugging Face token
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv‚Ä¶

In [28]:
from huggingface_hub import whoami
print(whoami())


{'type': 'user', 'id': '6682cc422b6af3f60a185123', 'name': 'BlueAutomata', 'fullname': 'Guillermo Luigui Ubaldo Nieto Angarita', 'email': 'guillermo.luigui.nieto@gmail.com', 'emailVerified': True, 'canPay': False, 'periodEnd': 1764547199, 'isPro': False, 'avatarUrl': '/avatars/b6cf26e7fac6e034fc5f2b2b87f9ff70.svg', 'orgs': [], 'auth': {'type': 'access_token', 'accessToken': {'displayName': 'write_token', 'role': 'write', 'createdAt': '2025-10-26T19:35:35.613Z'}}}


In [29]:

from huggingface_hub import login, create_repo
from transformers import AutoModelForQuestionAnswering, AutoTokenizer

# 1Ô∏è‚É£ Define your paths and repo name
model_dir = "./outputs"
repo_id = "BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico"

# 2Ô∏è‚É£ Create the repo (won‚Äôt fail if it already exists)
create_repo(repo_id, private=False, exist_ok=True)

# 3Ô∏è‚É£ Load your SimpleTransformers model as a standard HF model
hf_model = AutoModelForQuestionAnswering.from_pretrained(model_dir)
hf_tokenizer = AutoTokenizer.from_pretrained(model_dir)

# 4Ô∏è‚É£ Push to the Hugging Face Hub
hf_model.push_to_hub(
    repo_id,
    description="BERT-base Spanish WWM cased model fine-tuned for extractive QA on news articles from Colombia and Mexico.",
    tags=["spanish", "qa", "news", "colombia", "mexico", "bert-base", "wwm", "cased"]
)

hf_tokenizer.push_to_hub(repo_id)

The following layers were not sharded: bert.encoder.layer.*.intermediate.dense.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, qa_outputs.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.self.query.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.query.weight, qa_outputs.bias, ber

README.md: 0.00B [00:00, ?B/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...f1k4bhk/model.safetensors:   0%|          |  558kB /  437MB            

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico/commit/22004715b32e957bc7bff063963d63943405b381', commit_message='Upload tokenizer', commit_description='', oid='22004715b32e957bc7bff063963d63943405b381', pr_url=None, repo_url=RepoUrl('https://huggingface.co/BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico', endpoint='https://huggingface.co', repo_type='model', repo_id='BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico'), pr_revision=None, pr_num=None)

In [30]:

shutil.move("QA_model_bert.zip", "/content/drive/MyDrive/Tahesis_QA_Optimization/Model")

'/content/drive/MyDrive/Tahesis_QA_Optimization/Model/QA_model_bert.zip'

In [31]:
# Path to the folder containing the saved model
model_path = "./QA_model_bert"  # change if different

# Reload the model
my_model = QuestionAnsweringModel(
    "bert",
    model_path,
    use_cuda=True  # set to False if no GPU
)

The following layers were not sharded: bert.encoder.layer.*.intermediate.dense.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, qa_outputs.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.self.query.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.query.weight, qa_outputs.bias, ber

In [32]:
# Context & question
context = "Ciudad de M√©xico. El capit√°n de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez, fue hallado muerto en Tamaulipas."
question = "¬øQui√©n fue hallado muerto en Tamaulipas?"

# Prepare input in SimpleTransformers format
to_predict = [
    {
        "context": context,
        "qas": [
            {
                "id": "0",
                "question": question,
                "answers": [{"text": " ", "answer_start": 0}],
                "is_impossible": False
            }
        ]
    }
]

# Run prediction
answers = my_model.predict(to_predict)
print(answers)

convert squad examples to features: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 276.05it/s]
add example index and unique id: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 9619.96it/s]


Running Prediction:   0%|          | 0/1 [00:00<?, ?it/s]

([{'id': '0', 'answer': ['Abraham Jerem√≠as P√©rez Ram√≠rez,', 'Ram√≠rez', 'Abraham', 'Jerem√≠as P√©rez Ram√≠rez,', 'Abraham Jerem√≠as P√©rez', 'P√©rez Ram√≠rez', 'capit√°n de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'Abraham Jerem√≠as', 'Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'El capit√°n de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'Abraham Jerem√≠as P√©rez Ram√≠rez, fue hallado muerto en Tamaulipas.', 'Ciudad de M√©xico. El capit√°n de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'M√©xico. El capit√°n de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'de M√©xico. El capit√°n de la Secretar√≠a de Marina, Abraham Jerem√≠as P√©rez Ram√≠rez,', 'Abraham Jerem√≠as P√©rez Ram√≠rez, fue hallado muerto', 'Abraham Jerem

  with amp.autocast():


In [33]:
# Load your fine-tuned model from the Hub
qa = pipeline(
    "question-answering",
    model="BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico",
    tokenizer="BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico"
)

# Provide Spanish context
contexto = """
El presidente Gustavo Petro anunci√≥ nuevas medidas para impulsar el uso de energ√≠as renovables en Colombia,
especialmente en la regi√≥n del Caribe, donde los proyectos solares y e√≥licos han ganado protagonismo.
El objetivo del gobierno es reducir las emisiones de carbono en un 30% para el a√±o 2030.
"""

# Ask questions in Spanish
preguntas = [
    "¬øQui√©n anunci√≥ nuevas medidas para energ√≠as renovables?",
    "¬øEn qu√© regi√≥n se impulsar√°n los proyectos solares y e√≥licos?",
    "¬øCu√°l es el objetivo del gobierno para 2030?"
]

# Evaluate each question
for pregunta in preguntas:
    respuesta = qa(question=pregunta, context=contexto)
    print(f"‚ùì {pregunta}\nüí¨ {respuesta['answer']}\n")

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/437M [00:00<?, ?B/s]

The following layers were not sharded: bert.encoder.layer.*.intermediate.dense.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, qa_outputs.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.self.query.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.query.weight, qa_outputs.bias, ber

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

Device set to use cuda:0


‚ùì ¬øQui√©n anunci√≥ nuevas medidas para energ√≠as renovables?
üí¨ Gustavo Petro

‚ùì ¬øEn qu√© regi√≥n se impulsar√°n los proyectos solares y e√≥licos?
üí¨ Caribe

‚ùì ¬øCu√°l es el objetivo del gobierno para 2030?
üí¨ reducir las emisiones de carbono en un 30%



In [34]:
def flatten_squad(dataset):
    # If the dataset is a dict with "data", extract it
    if isinstance(dataset, dict) and "data" in dataset:
        dataset = dataset["data"]

    new_data = []
    for article in dataset:
        for para in article["paragraphs"]:
            new_data.append({
                "title": article.get("title", ""),
                "context": para["context"],
                "qas": para["qas"]
            })
    return new_data


In [35]:
flat_eval = flatten_squad(eval_dataset)

In [36]:
from transformers import pipeline
import evaluate

qa_pipeline = pipeline(
    "question-answering",
    model="BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico",
    tokenizer="BlueAutomata/bert-base-spanish-wwm-cased-news-qa-colombia-mexico"
)

metric = evaluate.load("squad")

The following layers were not sharded: bert.encoder.layer.*.intermediate.dense.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, qa_outputs.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.self.query.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.query.weight, qa_outputs.bias, ber

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

In [37]:
from transformers import pipeline
import evaluate

qa_pipeline_original = pipeline(
    "question-answering",
    model="mrm8488/bert-base-spanish-wwm-cased-finetuned-spa-squad2-es",
    tokenizer="mrm8488/bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
)


The following layers were not sharded: bert.encoder.layer.*.intermediate.dense.weight, bert.embeddings.position_embeddings.weight, bert.embeddings.token_type_embeddings.weight, bert.encoder.layer.*.attention.output.LayerNorm.weight, bert.embeddings.word_embeddings.weight, bert.encoder.layer.*.intermediate.dense.bias, qa_outputs.weight, bert.embeddings.LayerNorm.bias, bert.encoder.layer.*.output.dense.bias, bert.encoder.layer.*.attention.self.key.bias, bert.encoder.layer.*.attention.self.query.bias, bert.encoder.layer.*.attention.output.LayerNorm.bias, bert.encoder.layer.*.output.LayerNorm.weight, bert.embeddings.LayerNorm.weight, bert.encoder.layer.*.attention.self.key.weight, bert.encoder.layer.*.output.LayerNorm.bias, bert.encoder.layer.*.attention.output.dense.bias, bert.encoder.layer.*.attention.output.dense.weight, bert.encoder.layer.*.attention.self.value.bias, bert.encoder.layer.*.attention.self.value.weight, bert.encoder.layer.*.attention.self.query.weight, qa_outputs.bias, ber

In [38]:
predictions_original = []
references_original = []

for ex in flat_eval:
    context = ex["context"]
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            continue  # Skip unanswerable questions

        # Run QA prediction
        pred = qa_pipeline_original(question=qa["question"], context=context)

        # Collect prediction and reference
        predictions_original.append({
            "id": qa["id"],
            "prediction_text": pred["answer"]
        })

        references_original.append({
            "id": qa["id"],
            "answers": {
                "text": [a["text"] for a in qa["answers"]],
                "answer_start": [a["answer_start"] for a in qa["answers"]]
            }
        })


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [39]:
predictions = []
references = []

for ex in flat_eval:
    context = ex["context"]
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            continue  # Skip unanswerable questions

        # Run QA prediction
        pred = qa_pipeline(question=qa["question"], context=context)

        # Collect prediction and reference
        predictions.append({
            "id": qa["id"],
            "prediction_text": pred["answer"]
        })

        references.append({
            "id": qa["id"],
            "answers": {
                "text": [a["text"] for a in qa["answers"]],
                "answer_start": [a["answer_start"] for a in qa["answers"]]
            }
        })


In [40]:
results = metric.compute(predictions=predictions_original, references=references_original)
print("üìä Evaluation results:")
print(f"Exact Match: {results['exact_match']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

üìä Evaluation results:
Exact Match: 63.47
F1 Score: 77.85


In [41]:
results = metric.compute(predictions=predictions, references=references)
print("üìä Evaluation results:")
print(f"Exact Match: {results['exact_match']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

üìä Evaluation results:
Exact Match: 75.43
F1 Score: 84.31


In [42]:
import evaluate
metric = evaluate.load("squad_v2")

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

In [43]:
from tqdm import tqdm

predictions_original = []
references_original = []

for ex in tqdm(flat_eval):
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            # Questions that have no valid answer in the text
            predictions_original.append({
                "id": qa["id"],
                "prediction_text": "",
                "no_answer_probability": 1.0   # fully confident it's unanswerable
            })
            references_original.append({
                "id": qa["id"],
                "answers": {"text": [], "answer_start": []}
            })
        else:
            # Normal (answerable) questions
            pred = qa_pipeline_original(question=qa["question"], context=ex["context"])

            predictions_original.append({
                "id": qa["id"],
                "prediction_text": pred["answer"],
                # Use model confidence inversely as no-answer probability
                "no_answer_probability": 1.0 - pred.get("score", 0.0)
            })

            references_original.append({
                "id": qa["id"],
                "answers": {
                    "text": [a["text"] for a in qa["answers"]],
                    "answer_start": [a["answer_start"] for a in qa["answers"]]
                }
            })

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3846/3846 [04:18<00:00, 14.87it/s]


In [44]:
from tqdm import tqdm

predictions = []
references = []

for ex in tqdm(flat_eval):
    for qa in ex["qas"]:
        if qa["is_impossible"]:
            # Questions that have no valid answer in the text
            predictions.append({
                "id": qa["id"],
                "prediction_text": "",
                "no_answer_probability": 1.0   # fully confident it's unanswerable
            })
            references.append({
                "id": qa["id"],
                "answers": {"text": [], "answer_start": []}
            })
        else:
            # Normal (answerable) questions
            pred = qa_pipeline(question=qa["question"], context=ex["context"])

            predictions.append({
                "id": qa["id"],
                "prediction_text": pred["answer"],
                # Use model confidence inversely as no-answer probability
                "no_answer_probability": 1.0 - pred.get("score", 0.0)
            })

            references.append({
                "id": qa["id"],
                "answers": {
                    "text": [a["text"] for a in qa["answers"]],
                    "answer_start": [a["answer_start"] for a in qa["answers"]]
                }
            })

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3846/3846 [04:03<00:00, 15.83it/s]


In [45]:
predictions_original

[{'id': 'qa-20530',
  'prediction_text': 'Nine Rain',
  'no_answer_probability': 0.6255012252368033},
 {'id': 'qa-20528',
  'prediction_text': 'Tuxedomoon',
  'no_answer_probability': 0.09759828766379997},
 {'id': 'qa-27433', 'prediction_text': '', 'no_answer_probability': 1.0},
 {'id': 'qa-27430',
  'prediction_text': 'alertas sanitarias',
  'no_answer_probability': 0.8195620966143906},
 {'id': 'qa-27428',
  'prediction_text': 'difenhidramina',
  'no_answer_probability': 0.6039035145154728},
 {'id': 'qa-21633',
  'prediction_text': 'Tren Maya',
  'no_answer_probability': 0.9427659163629869},
 {'id': 'qa-21634', 'prediction_text': '', 'no_answer_probability': 1.0},
 {'id': 'qa-20670',
  'prediction_text': 'Citlali',
  'no_answer_probability': -0.22176275906087994},
 {'id': 'qa-21307',
  'prediction_text': 'Argentina',
  'no_answer_probability': 0.32920163638300437},
 {'id': 'qa-21250',
  'prediction_text': 'proceso de autodestrucci√≥n marcado por adicciones',
  'no_answer_probability':

In [46]:
results = metric.compute(predictions=predictions_original, references=references_original)

print("üìä Evaluation results:")
print(f"Exact Match: {results['exact']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

üìä Evaluation results:
Exact Match: 74.76
F1 Score: 84.70


In [47]:
results = metric.compute(predictions=predictions, references=references)
print("üìä Evaluation results:")
print(f"Exact Match: {results['exact']:.2f}")
print(f"F1 Score: {results['f1']:.2f}")

üìä Evaluation results:
Exact Match: 83.02
F1 Score: 89.16


In [None]:
!mv outputs outputs_bert
!zip -r outputs_bert.zip outputs_bert

  adding: outputs_bert/ (stored 0%)
  adding: outputs_bert/checkpoint-10500/ (stored 0%)
  adding: outputs_bert/checkpoint-10500/config.json (deflated 49%)
  adding: outputs_bert/checkpoint-10500/optimizer.pt (deflated 10%)
  adding: outputs_bert/checkpoint-10500/model.safetensors (deflated 7%)
  adding: outputs_bert/checkpoint-10500/training_args.bin (deflated 53%)
  adding: outputs_bert/checkpoint-10500/special_tokens_map.json (deflated 42%)
  adding: outputs_bert/checkpoint-10500/vocab.txt (deflated 54%)
  adding: outputs_bert/checkpoint-10500/model_args.json (deflated 61%)
  adding: outputs_bert/checkpoint-10500/scheduler.pt (deflated 61%)
  adding: outputs_bert/checkpoint-10500/tokenizer_config.json (deflated 74%)
  adding: outputs_bert/checkpoint-10500/eval_results.txt (deflated 10%)
  adding: outputs_bert/checkpoint-12000/ (stored 0%)
  adding: outputs_bert/checkpoint-12000/config.json (deflated 49%)
  adding: outputs_bert/checkpoint-12000/optimizer.pt (deflated 10%)
  adding: o

In [None]:
shutil.move("outputs_bert.zip", "/content/drive/MyDrive/Thesis_QA_Optimization/Model")

In [None]:
import os
import shutil

# Define the full path to the destination directory
destination_dir = '/content/drive/MyDrive/Thesis_QA_Optimization/Model'
source_file = 'outputs_bert.zip' # This is the file you want to move

# 1. Check if the directory exists and create it if it doesn't
# The `exist_ok=True` argument prevents an error if the directory already exists.
# The `os.makedirs` function creates all intermediate-level directories needed.
os.makedirs(destination_dir, exist_ok=True)

# 2. Now you can safely move the file
try:
    shutil.move(source_file, destination_dir)
    print(f"Successfully moved {source_file} to {destination_dir}")
except FileNotFoundError as e:
    # This might catch a different FileNotFoundError if the source file doesn't exist,
    # but the primary directory issue should be resolved by os.makedirs.
    print(f"Error moving file: {e}")