# Cargar y preparar el dataset

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import torch
import numpy as np
import random
from transformers import set_seed

def fix_all_seeds(seed=42):
    # Semilla para librerías estándar
    random.seed(seed)
    np.random.seed(seed)

    # Semilla para PyTorch (CPU y GPU)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    # Semilla específica de Transformers (afecta a inicialización de pesos y dropout)
    set_seed(seed)

    # Garantizar determinismo en algoritmos de la GPU (puede ralentizar un poco)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    print(f"Semilla {seed} fijada globalmente.")

fix_all_seeds(42)

Semilla 42 fijada globalmente.


In [None]:
import json
import pandas as pd
from datasets import Dataset, DatasetDict
from sklearn.model_selection import GroupShuffleSplit

def load_and_split_grouped_dataset(path, test_size=0.1, val_size=0.1, seed=42):
    # 1. Cargar el archivo JSONL a un DataFrame de Pandas
    with open(path, 'r', encoding='utf-8') as f:
        data = [json.loads(line) for line in f]
    df = pd.DataFrame(data)

   # 1. División Train vs Test
    gss = GroupShuffleSplit(n_splits=1, test_size=test_size, random_state=seed)
    train_idx, test_idx = next(gss.split(df, groups=df['group_id']))
    df_train_total = df.iloc[train_idx]
    df_test = df.iloc[test_idx]

    # 2. División Train vs Validation
    val_relative_size = val_size / (1 - test_size)
    gss_val = GroupShuffleSplit(n_splits=1, test_size=val_relative_size, random_state=seed)
    train_final_idx, val_idx = next(gss_val.split(df_train_total, groups=df_train_total['group_id']))

    # 3. BARAJADO EXPLÍCITO
    # .sample(frac=1) desordena todas las filas. reset_index evita conflictos de índices.
    train_df = df_train_total.iloc[train_final_idx].sample(frac=1, random_state=seed).reset_index(drop=True)
    val_df = df_train_total.iloc[val_idx].sample(frac=1, random_state=seed).reset_index(drop=True)
    test_df = df_test.sample(frac=1, random_state=seed).reset_index(drop=True)

    return DatasetDict({
        'train': Dataset.from_pandas(train_df),
        'validation': Dataset.from_pandas(val_df),
        'test': Dataset.from_pandas(test_df)
    })

path_dataset = '/content/drive/MyDrive/TFG/multimodal_dataset_lite.jsonl'
dataset = load_and_split_grouped_dataset(path_dataset)

In [None]:
# Comptobacion
train_ids = set(dataset['train']['group_id'])
test_ids = set(dataset['test']['group_id'])

# La intersección debe ser un conjunto vacío
overlap = train_ids.intersection(test_ids)
print(f"Coincidencias de IDs entre train y test: {len(overlap)}")

Coincidencias de IDs entre train y test: 0


In [None]:
from transformers import AutoTokenizer

model_checkpoint = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

def preprocess_function(examples):
    texts = [f"{t} [SEP] {c}" for t, c in zip(examples['title'], examples['title'])]
    tokenized_inputs = tokenizer(texts, truncation=True, padding='max_length', max_length=512)

    # Casting explícito a int para evitar NaNs y errores de interpretación
    #tokenized_inputs["labels"] = [int(label) for label in examples["is_real"]]
    tokenized_inputs["labels"] = examples["is_real"]
    return tokenized_inputs
'''
def preprocess_function(examples):
    # Opcional: Limpiar artefactos para forzar aprendizaje semántico
    clean_contents = [re.sub(r'\n\n|\n|\s{2,}', ' ', c) for c in examples['title']]

    # Combinamos titular y contenido limpio
    texts = [f"{t} [SEP] {c}" for t, c in zip(examples['title'], clean_contents)]
    tokenized_inputs = tokenizer(texts, truncation=True, padding='max_length', max_length=512)
    tokenized_inputs["labels"] = examples["is_real"]
    return tokenized_inputs
'''
tokenized_dataset = dataset.map(preprocess_function, batched=True)
print(f"Dataset features: {tokenized_dataset['train'].features}")

  clean_contents = [re.sub(r'\n\n|\n|\s{2,}', ' ', c) for c in examples['content']]


Map:   0%|          | 0/48 [00:00<?, ? examples/s]

Map:   0%|          | 0/6 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Dataset features: {'group_id': Value('string'), 'is_real': Value('int64'), 'title': Value('string'), 'content': Value('string'), 'image_path': Value('string'), 'model': Value('string'), 'technique': Value('string'), 'input_ids': List(Value('int32')), 'token_type_ids': List(Value('int8')), 'attention_mask': List(Value('int8')), 'labels': Value('int64')}


In [6]:
print(tokenized_dataset['validation']['labels'][:100])

[1, 0, 0, 1, 1, 0]


# Cargar modelo y ejecutar entrenamiento


## Definir funciones de metricas

In [7]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.6


In [8]:
import numpy as np
import evaluate

# Cargamos la métrica estándar de clasificación
metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    acc = metric.compute(predictions=predictions, references=labels)["accuracy"]
    f1 = f1_metric.compute(predictions=predictions, references=labels)["f1"]

    return {"accuracy": acc, "f1": f1}

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Para la optimización de hiperparámetros se implementó un Weights & Biases Sweep empleando Optimización Bayesiana. Este enfoque permite modelar la función de pérdida mediante un proceso Gaussiano, optimizando la búsqueda en el espacio de parámetros (Search Space) y convergiendo hacia el mínimo global de forma más eficiente que un muestreo aleatorio (Random Search).

In [11]:
import wandb
import re
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback

sweep_config = {
    'method': 'random',
    'metric': {'name': 'eval_f1', 'goal': 'maximize'},
    'parameters': {
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 1e-6,
            'max': 2e-5
        },
        'weight_decay': {
            'values': [0.01, 0.05, 0.1]
        },
        'num_train_epochs': {
            'values': [3, 5]
        },
        'warmup_ratio': {
            'values': [0.1, 0.15]
        },
        'per_device_train_batch_size': {
            'values': [8, 16]
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project="tfg-deberta-tuning")

def train_iteration():
    with wandb.init():
        config = wandb.config

        id2label = {0: "SYNTHETIC", 1: "REAL"}
        label2id = {"SYNTHETIC": 0, "REAL": 1}

        model = AutoModelForSequenceClassification.from_pretrained(
            "microsoft/deberta-v3-base",
            num_labels=2,
            id2label=id2label,
            label2id=label2id
        )

        args = TrainingArguments(
            output_dir="./temp_checkpoints",
            report_to="wandb",
            learning_rate=config.learning_rate,
            num_train_epochs=config.num_train_epochs,
            per_device_train_batch_size=config.per_device_train_batch_size,
            eval_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            metric_for_best_model="f1"
        )

        trainer = Trainer(
            model=model,
            args=args,
            train_dataset=tokenized_dataset["train"],
            eval_dataset=tokenized_dataset["validation"],
            compute_metrics=compute_metrics,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
        )

        trainer.train()

wandb.agent(sweep_id, function=train_iteration, count=10)

Create sweep with ID: 15zy4s4w
Sweep URL: https://wandb.ai/javierprior04-universidad-de-murcia/tfg-deberta-tuning/sweeps/15zy4s4w


[34m[1mwandb[0m: Agent Starting Run: 5soj9tdv with config:
[34m[1mwandb[0m: 	learning_rate: 1.4621801161577178e-06
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	warmup_ratio: 0.15
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.


Loading weights:   0%|          | 0/198 [00:00<?, ?it/s]

DebertaV2ForSequenceClassification LOAD REPORT from: microsoft/deberta-v3-base
Key                                     | Status     | 
----------------------------------------+------------+-
mask_predictions.classifier.weight      | UNEXPECTED | 
lm_predictions.lm_head.dense.bias       | UNEXPECTED | 
mask_predictions.LayerNorm.bias         | UNEXPECTED | 
lm_predictions.lm_head.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.bias   | UNEXPECTED | 
lm_predictions.lm_head.dense.weight     | UNEXPECTED | 
mask_predictions.dense.weight           | UNEXPECTED | 
mask_predictions.LayerNorm.weight       | UNEXPECTED | 
mask_predictions.classifier.bias        | UNEXPECTED | 
mask_predictions.dense.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.weight | UNEXPECTED | 
classifier.bias                         | MISSING    | 
classifier.weight                       | MISSING    | 
pooler.dense.bias                       | MISSING    | 
pooler.dense.weight      

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.648941,0.666667,0.666667
2,No log,0.618443,0.666667,0.666667
3,No log,0.595309,0.666667,0.666667


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['deberta.embeddings.LayerNorm.weight', 'deberta.embeddings.LayerNorm.bias', 'deberta.encoder.layer.0.attention.output.LayerNorm.weight', 'deberta.encoder.layer.0.attention.output.LayerNorm.bias', 'deberta.encoder.layer.0.output.LayerNorm.weight', 'deberta.encoder.layer.0.output.LayerNorm.bias', 'deberta.encoder.layer.1.attention.output.LayerNorm.weight', 'deberta.encoder.layer.1.attention.output.LayerNorm.bias', 'deberta.encoder.layer.1.output.LayerNorm.weight', 'deberta.encoder.layer.1.output.LayerNorm.bias', 'deberta.encoder.layer.2.attention.output.LayerNorm.weight', 'deberta.encoder.layer.2.attention.output.LayerNorm.bias', 'deberta.encoder.layer.2.output.LayerNorm.weight', 'deberta.encoder.layer.2.output.LayerNorm.bias', 'deberta.encoder.layer.3.attention.output.LayerNorm.weight', 'deberta.encoder.layer.3.attention.output.LayerNorm.bias', 'deberta.encoder.layer.3.output.LayerNorm.weight', 'deberta.encoder.layer.3.output.Laye

0,1
eval/accuracy,▁▁▁
eval/f1,▁▁▁
eval/loss,█▄▁
eval/runtime,█▁▂
eval/samples_per_second,▁█▇
eval/steps_per_second,▁█▇
train/epoch,▁▅██
train/global_step,▁▅██

0,1
eval/accuracy,0.66667
eval/f1,0.66667
eval/loss,0.59531
eval/runtime,0.1507
eval/samples_per_second,39.805
eval/steps_per_second,6.634
total_flos,37888671449088.0
train/epoch,3
train/global_step,18
train_loss,0.58857


[34m[1mwandb[0m: Agent Starting Run: a3zqgw38 with config:
[34m[1mwandb[0m: 	learning_rate: 1.4916867195135689e-05
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	warmup_ratio: 0.15
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.


Loading weights:   0%|          | 0/198 [00:00<?, ?it/s]

DebertaV2ForSequenceClassification LOAD REPORT from: microsoft/deberta-v3-base
Key                                     | Status     | 
----------------------------------------+------------+-
mask_predictions.classifier.weight      | UNEXPECTED | 
lm_predictions.lm_head.dense.bias       | UNEXPECTED | 
mask_predictions.LayerNorm.bias         | UNEXPECTED | 
lm_predictions.lm_head.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.bias   | UNEXPECTED | 
lm_predictions.lm_head.dense.weight     | UNEXPECTED | 
mask_predictions.dense.weight           | UNEXPECTED | 
mask_predictions.LayerNorm.weight       | UNEXPECTED | 
mask_predictions.classifier.bias        | UNEXPECTED | 
mask_predictions.dense.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.weight | UNEXPECTED | 
classifier.bias                         | MISSING    | 
classifier.weight                       | MISSING    | 
pooler.dense.bias                       | MISSING    | 
pooler.dense.weight      

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.723755,0.5,0.666667
2,No log,0.460266,0.833333,0.8
3,No log,0.358103,0.833333,0.8
4,No log,0.737263,0.5,0.0


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['deberta.embeddings.LayerNorm.weight', 'deberta.embeddings.LayerNorm.bias', 'deberta.encoder.layer.0.attention.output.LayerNorm.weight', 'deberta.encoder.layer.0.attention.output.LayerNorm.bias', 'deberta.encoder.layer.0.output.LayerNorm.weight', 'deberta.encoder.layer.0.output.LayerNorm.bias', 'deberta.encoder.layer.1.attention.output.LayerNorm.weight', 'deberta.encoder.layer.1.attention.output.LayerNorm.bias', 'deberta.encoder.layer.1.output.LayerNorm.weight', 'deberta.encoder.layer.1.output.LayerNorm.bias', 'deberta.encoder.layer.2.attention.output.LayerNorm.weight', 'deberta.encoder.layer.2.attention.output.LayerNorm.bias', 'deberta.encoder.layer.2.output.LayerNorm.weight', 'deberta.encoder.layer.2.output.LayerNorm.bias', 'deberta.encoder.layer.3.attention.output.LayerNorm.weight', 'deberta.encoder.layer.3.attention.output.LayerNorm.bias', 'deberta.encoder.layer.3.output.LayerNorm.weight', 'deberta.encoder.layer.3.output.Laye

0,1
eval/accuracy,▁██▁
eval/f1,▇██▁
eval/loss,█▃▁█
eval/runtime,▁█▇█
eval/samples_per_second,█▁▃▁
eval/steps_per_second,█▁▃▁
train/epoch,▁▃▆██
train/global_step,▁▃▆██

0,1
eval/accuracy,0.5
eval/f1,0
eval/loss,0.73726
eval/runtime,0.1502
eval/samples_per_second,39.948
eval/steps_per_second,6.658
total_flos,50518228598784.0
train/epoch,4
train/global_step,24
train_loss,0.73386


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5w9eofp7 with config:
[34m[1mwandb[0m: 	learning_rate: 9.289953609389355e-06
[34m[1mwandb[0m: 	num_train_epochs: 3
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	warmup_ratio: 0.15
[34m[1mwandb[0m: 	weight_decay: 0.05
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.


Loading weights:   0%|          | 0/198 [00:00<?, ?it/s]

DebertaV2ForSequenceClassification LOAD REPORT from: microsoft/deberta-v3-base
Key                                     | Status     | 
----------------------------------------+------------+-
mask_predictions.classifier.weight      | UNEXPECTED | 
lm_predictions.lm_head.dense.bias       | UNEXPECTED | 
mask_predictions.LayerNorm.bias         | UNEXPECTED | 
lm_predictions.lm_head.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.bias   | UNEXPECTED | 
lm_predictions.lm_head.dense.weight     | UNEXPECTED | 
mask_predictions.dense.weight           | UNEXPECTED | 
mask_predictions.LayerNorm.weight       | UNEXPECTED | 
mask_predictions.classifier.bias        | UNEXPECTED | 
mask_predictions.dense.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.weight | UNEXPECTED | 
classifier.bias                         | MISSING    | 
classifier.weight                       | MISSING    | 
pooler.dense.bias                       | MISSING    | 
pooler.dense.weight      

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.681301,0.5,0.4
2,No log,0.641263,0.666667,0.666667
3,No log,0.621505,0.666667,0.666667


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['deberta.embeddings.LayerNorm.weight', 'deberta.embeddings.LayerNorm.bias', 'deberta.encoder.layer.0.attention.output.LayerNorm.weight', 'deberta.encoder.layer.0.attention.output.LayerNorm.bias', 'deberta.encoder.layer.0.output.LayerNorm.weight', 'deberta.encoder.layer.0.output.LayerNorm.bias', 'deberta.encoder.layer.1.attention.output.LayerNorm.weight', 'deberta.encoder.layer.1.attention.output.LayerNorm.bias', 'deberta.encoder.layer.1.output.LayerNorm.weight', 'deberta.encoder.layer.1.output.LayerNorm.bias', 'deberta.encoder.layer.2.attention.output.LayerNorm.weight', 'deberta.encoder.layer.2.attention.output.LayerNorm.bias', 'deberta.encoder.layer.2.output.LayerNorm.weight', 'deberta.encoder.layer.2.output.LayerNorm.bias', 'deberta.encoder.layer.3.attention.output.LayerNorm.weight', 'deberta.encoder.layer.3.attention.output.LayerNorm.bias', 'deberta.encoder.layer.3.output.LayerNorm.weight', 'deberta.encoder.layer.3.output.Laye

0,1
eval/accuracy,▁██
eval/f1,▁██
eval/loss,█▃▁
eval/runtime,▁▁█
eval/samples_per_second,█▇▁
eval/steps_per_second,██▁
train/epoch,▁▅██
train/global_step,▁▅██

0,1
eval/accuracy,0.66667
eval/f1,0.66667
eval/loss,0.62151
eval/runtime,0.1509
eval/samples_per_second,39.756
eval/steps_per_second,6.626
total_flos,37888671449088.0
train/epoch,3
train/global_step,9
train_loss,0.57427


[34m[1mwandb[0m: Agent Starting Run: ztxoli6w with config:
[34m[1mwandb[0m: 	learning_rate: 6.896998319389289e-06
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	warmup_ratio: 0.15
[34m[1mwandb[0m: 	weight_decay: 0.05
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.


Loading weights:   0%|          | 0/198 [00:00<?, ?it/s]

DebertaV2ForSequenceClassification LOAD REPORT from: microsoft/deberta-v3-base
Key                                     | Status     | 
----------------------------------------+------------+-
mask_predictions.classifier.weight      | UNEXPECTED | 
lm_predictions.lm_head.dense.bias       | UNEXPECTED | 
mask_predictions.LayerNorm.bias         | UNEXPECTED | 
lm_predictions.lm_head.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.bias   | UNEXPECTED | 
lm_predictions.lm_head.dense.weight     | UNEXPECTED | 
mask_predictions.dense.weight           | UNEXPECTED | 
mask_predictions.LayerNorm.weight       | UNEXPECTED | 
mask_predictions.classifier.bias        | UNEXPECTED | 
mask_predictions.dense.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.weight | UNEXPECTED | 
classifier.bias                         | MISSING    | 
classifier.weight                       | MISSING    | 
pooler.dense.bias                       | MISSING    | 
pooler.dense.weight      

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.655588,0.666667,0.666667
2,No log,0.6384,0.666667,0.666667
3,No log,0.590463,0.666667,0.666667


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['deberta.embeddings.LayerNorm.weight', 'deberta.embeddings.LayerNorm.bias', 'deberta.encoder.layer.0.attention.output.LayerNorm.weight', 'deberta.encoder.layer.0.attention.output.LayerNorm.bias', 'deberta.encoder.layer.0.output.LayerNorm.weight', 'deberta.encoder.layer.0.output.LayerNorm.bias', 'deberta.encoder.layer.1.attention.output.LayerNorm.weight', 'deberta.encoder.layer.1.attention.output.LayerNorm.bias', 'deberta.encoder.layer.1.output.LayerNorm.weight', 'deberta.encoder.layer.1.output.LayerNorm.bias', 'deberta.encoder.layer.2.attention.output.LayerNorm.weight', 'deberta.encoder.layer.2.attention.output.LayerNorm.bias', 'deberta.encoder.layer.2.output.LayerNorm.weight', 'deberta.encoder.layer.2.output.LayerNorm.bias', 'deberta.encoder.layer.3.attention.output.LayerNorm.weight', 'deberta.encoder.layer.3.attention.output.LayerNorm.bias', 'deberta.encoder.layer.3.output.LayerNorm.weight', 'deberta.encoder.layer.3.output.Laye

0,1
eval/accuracy,▁▁▁
eval/f1,▁▁▁
eval/loss,█▆▁
eval/runtime,▃▁█
eval/samples_per_second,▆█▁
eval/steps_per_second,▆█▁
train/epoch,▁▅██
train/global_step,▁▅██

0,1
eval/accuracy,0.66667
eval/f1,0.66667
eval/loss,0.59046
eval/runtime,0.1504
eval/samples_per_second,39.881
eval/steps_per_second,6.647
total_flos,37888671449088.0
train/epoch,3
train/global_step,9
train_loss,0.58014


[34m[1mwandb[0m: Agent Starting Run: gb110cio with config:
[34m[1mwandb[0m: 	learning_rate: 2.2979725837292763e-06
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	warmup_ratio: 0.15
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.


Loading weights:   0%|          | 0/198 [00:00<?, ?it/s]

DebertaV2ForSequenceClassification LOAD REPORT from: microsoft/deberta-v3-base
Key                                     | Status     | 
----------------------------------------+------------+-
mask_predictions.classifier.weight      | UNEXPECTED | 
lm_predictions.lm_head.dense.bias       | UNEXPECTED | 
mask_predictions.LayerNorm.bias         | UNEXPECTED | 
lm_predictions.lm_head.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.bias   | UNEXPECTED | 
lm_predictions.lm_head.dense.weight     | UNEXPECTED | 
mask_predictions.dense.weight           | UNEXPECTED | 
mask_predictions.LayerNorm.weight       | UNEXPECTED | 
mask_predictions.classifier.bias        | UNEXPECTED | 
mask_predictions.dense.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.weight | UNEXPECTED | 
classifier.bias                         | MISSING    | 
classifier.weight                       | MISSING    | 
pooler.dense.bias                       | MISSING    | 
pooler.dense.weight      

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.655164,0.666667,0.666667
2,No log,0.651695,0.666667,0.666667
3,No log,0.641751,0.666667,0.666667


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['deberta.embeddings.LayerNorm.weight', 'deberta.embeddings.LayerNorm.bias', 'deberta.encoder.layer.0.attention.output.LayerNorm.weight', 'deberta.encoder.layer.0.attention.output.LayerNorm.bias', 'deberta.encoder.layer.0.output.LayerNorm.weight', 'deberta.encoder.layer.0.output.LayerNorm.bias', 'deberta.encoder.layer.1.attention.output.LayerNorm.weight', 'deberta.encoder.layer.1.attention.output.LayerNorm.bias', 'deberta.encoder.layer.1.output.LayerNorm.weight', 'deberta.encoder.layer.1.output.LayerNorm.bias', 'deberta.encoder.layer.2.attention.output.LayerNorm.weight', 'deberta.encoder.layer.2.attention.output.LayerNorm.bias', 'deberta.encoder.layer.2.output.LayerNorm.weight', 'deberta.encoder.layer.2.output.LayerNorm.bias', 'deberta.encoder.layer.3.attention.output.LayerNorm.weight', 'deberta.encoder.layer.3.attention.output.LayerNorm.bias', 'deberta.encoder.layer.3.output.LayerNorm.weight', 'deberta.encoder.layer.3.output.Laye

0,1
eval/accuracy,▁▁▁
eval/f1,▁▁▁
eval/loss,█▆▁
eval/runtime,█▄▁
eval/samples_per_second,▁▅█
eval/steps_per_second,▁▅█
train/epoch,▁▅██
train/global_step,▁▅██

0,1
eval/accuracy,0.66667
eval/f1,0.66667
eval/loss,0.64175
eval/runtime,0.1497
eval/samples_per_second,40.071
eval/steps_per_second,6.678
total_flos,37888671449088.0
train/epoch,3
train/global_step,9
train_loss,0.61983


[34m[1mwandb[0m: Agent Starting Run: ax0p1ewb with config:
[34m[1mwandb[0m: 	learning_rate: 2.4859827295684187e-06
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.


Loading weights:   0%|          | 0/198 [00:00<?, ?it/s]

DebertaV2ForSequenceClassification LOAD REPORT from: microsoft/deberta-v3-base
Key                                     | Status     | 
----------------------------------------+------------+-
mask_predictions.classifier.weight      | UNEXPECTED | 
lm_predictions.lm_head.dense.bias       | UNEXPECTED | 
mask_predictions.LayerNorm.bias         | UNEXPECTED | 
lm_predictions.lm_head.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.bias   | UNEXPECTED | 
lm_predictions.lm_head.dense.weight     | UNEXPECTED | 
mask_predictions.dense.weight           | UNEXPECTED | 
mask_predictions.LayerNorm.weight       | UNEXPECTED | 
mask_predictions.classifier.bias        | UNEXPECTED | 
mask_predictions.dense.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.weight | UNEXPECTED | 
classifier.bias                         | MISSING    | 
classifier.weight                       | MISSING    | 
pooler.dense.bias                       | MISSING    | 
pooler.dense.weight      

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.655147,0.5,0.4
2,No log,0.649507,0.666667,0.666667
3,No log,0.635924,0.666667,0.666667
4,No log,0.627464,0.666667,0.666667


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['deberta.embeddings.LayerNorm.weight', 'deberta.embeddings.LayerNorm.bias', 'deberta.encoder.layer.0.attention.output.LayerNorm.weight', 'deberta.encoder.layer.0.attention.output.LayerNorm.bias', 'deberta.encoder.layer.0.output.LayerNorm.weight', 'deberta.encoder.layer.0.output.LayerNorm.bias', 'deberta.encoder.layer.1.attention.output.LayerNorm.weight', 'deberta.encoder.layer.1.attention.output.LayerNorm.bias', 'deberta.encoder.layer.1.output.LayerNorm.weight', 'deberta.encoder.layer.1.output.LayerNorm.bias', 'deberta.encoder.layer.2.attention.output.LayerNorm.weight', 'deberta.encoder.layer.2.attention.output.LayerNorm.bias', 'deberta.encoder.layer.2.output.LayerNorm.weight', 'deberta.encoder.layer.2.output.LayerNorm.bias', 'deberta.encoder.layer.3.attention.output.LayerNorm.weight', 'deberta.encoder.layer.3.attention.output.LayerNorm.bias', 'deberta.encoder.layer.3.output.LayerNorm.weight', 'deberta.encoder.layer.3.output.Laye

0,1
eval/accuracy,▁███
eval/f1,▁███
eval/loss,█▇▃▁
eval/runtime,█▂▁▁
eval/samples_per_second,▁▇██
eval/steps_per_second,▁▇██
train/epoch,▁▃▆██
train/global_step,▁▃▆██

0,1
eval/accuracy,0.66667
eval/f1,0.66667
eval/loss,0.62746
eval/runtime,0.1521
eval/samples_per_second,39.459
eval/steps_per_second,6.576
total_flos,50518228598784.0
train/epoch,4
train/global_step,12
train_loss,0.57483


[34m[1mwandb[0m: Agent Starting Run: uzq006tq with config:
[34m[1mwandb[0m: 	learning_rate: 1.1134115230143356e-06
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.05
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.


Loading weights:   0%|          | 0/198 [00:00<?, ?it/s]

DebertaV2ForSequenceClassification LOAD REPORT from: microsoft/deberta-v3-base
Key                                     | Status     | 
----------------------------------------+------------+-
mask_predictions.classifier.weight      | UNEXPECTED | 
lm_predictions.lm_head.dense.bias       | UNEXPECTED | 
mask_predictions.LayerNorm.bias         | UNEXPECTED | 
lm_predictions.lm_head.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.bias   | UNEXPECTED | 
lm_predictions.lm_head.dense.weight     | UNEXPECTED | 
mask_predictions.dense.weight           | UNEXPECTED | 
mask_predictions.LayerNorm.weight       | UNEXPECTED | 
mask_predictions.classifier.bias        | UNEXPECTED | 
mask_predictions.dense.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.weight | UNEXPECTED | 
classifier.bias                         | MISSING    | 
classifier.weight                       | MISSING    | 
pooler.dense.bias                       | MISSING    | 
pooler.dense.weight      

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.69415,0.5,0.571429
2,No log,0.682679,0.666667,0.666667
3,No log,0.664219,0.833333,0.857143
4,No log,0.649425,0.833333,0.857143
5,No log,0.645102,0.833333,0.857143


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['deberta.embeddings.LayerNorm.weight', 'deberta.embeddings.LayerNorm.bias', 'deberta.encoder.layer.0.attention.output.LayerNorm.weight', 'deberta.encoder.layer.0.attention.output.LayerNorm.bias', 'deberta.encoder.layer.0.output.LayerNorm.weight', 'deberta.encoder.layer.0.output.LayerNorm.bias', 'deberta.encoder.layer.1.attention.output.LayerNorm.weight', 'deberta.encoder.layer.1.attention.output.LayerNorm.bias', 'deberta.encoder.layer.1.output.LayerNorm.weight', 'deberta.encoder.layer.1.output.LayerNorm.bias', 'deberta.encoder.layer.2.attention.output.LayerNorm.weight', 'deberta.encoder.layer.2.attention.output.LayerNorm.bias', 'deberta.encoder.layer.2.output.LayerNorm.weight', 'deberta.encoder.layer.2.output.LayerNorm.bias', 'deberta.encoder.layer.3.attention.output.LayerNorm.weight', 'deberta.encoder.layer.3.attention.output.LayerNorm.bias', 'deberta.encoder.layer.3.output.LayerNorm.weight', 'deberta.encoder.layer.3.output.Laye

0,1
eval/accuracy,▁▅███
eval/f1,▁▃███
eval/loss,█▆▄▂▁
eval/runtime,█▁▁▅▃
eval/samples_per_second,▁██▄▆
eval/steps_per_second,▁██▄▆
train/epoch,▁▃▅▆██
train/global_step,▁▃▅▆██

0,1
eval/accuracy,0.83333
eval/f1,0.85714
eval/loss,0.6451
eval/runtime,0.1524
eval/samples_per_second,39.363
eval/steps_per_second,6.56
total_flos,63147785748480.0
train/epoch,5
train/global_step,30
train_loss,0.52037


[34m[1mwandb[0m: Agent Starting Run: xnsq0s3c with config:
[34m[1mwandb[0m: 	learning_rate: 6.293089481511758e-06
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.1
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.


Loading weights:   0%|          | 0/198 [00:00<?, ?it/s]

DebertaV2ForSequenceClassification LOAD REPORT from: microsoft/deberta-v3-base
Key                                     | Status     | 
----------------------------------------+------------+-
mask_predictions.classifier.weight      | UNEXPECTED | 
lm_predictions.lm_head.dense.bias       | UNEXPECTED | 
mask_predictions.LayerNorm.bias         | UNEXPECTED | 
lm_predictions.lm_head.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.bias   | UNEXPECTED | 
lm_predictions.lm_head.dense.weight     | UNEXPECTED | 
mask_predictions.dense.weight           | UNEXPECTED | 
mask_predictions.LayerNorm.weight       | UNEXPECTED | 
mask_predictions.classifier.bias        | UNEXPECTED | 
mask_predictions.dense.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.weight | UNEXPECTED | 
classifier.bias                         | MISSING    | 
classifier.weight                       | MISSING    | 
pooler.dense.bias                       | MISSING    | 
pooler.dense.weight      

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.5824,0.833333,0.8
2,No log,0.497143,0.666667,0.5
3,No log,0.258264,1.0,1.0
4,No log,0.262182,0.833333,0.8
5,No log,0.379101,0.833333,0.8


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['deberta.embeddings.LayerNorm.weight', 'deberta.embeddings.LayerNorm.bias', 'deberta.encoder.layer.0.attention.output.LayerNorm.weight', 'deberta.encoder.layer.0.attention.output.LayerNorm.bias', 'deberta.encoder.layer.0.output.LayerNorm.weight', 'deberta.encoder.layer.0.output.LayerNorm.bias', 'deberta.encoder.layer.1.attention.output.LayerNorm.weight', 'deberta.encoder.layer.1.attention.output.LayerNorm.bias', 'deberta.encoder.layer.1.output.LayerNorm.weight', 'deberta.encoder.layer.1.output.LayerNorm.bias', 'deberta.encoder.layer.2.attention.output.LayerNorm.weight', 'deberta.encoder.layer.2.attention.output.LayerNorm.bias', 'deberta.encoder.layer.2.output.LayerNorm.weight', 'deberta.encoder.layer.2.output.LayerNorm.bias', 'deberta.encoder.layer.3.attention.output.LayerNorm.weight', 'deberta.encoder.layer.3.attention.output.LayerNorm.bias', 'deberta.encoder.layer.3.output.LayerNorm.weight', 'deberta.encoder.layer.3.output.Laye

0,1
eval/accuracy,▄▁█▄▄
eval/f1,▅▁█▅▅
eval/loss,█▆▁▁▄
eval/runtime,▁█▁▆▁
eval/samples_per_second,█▁█▃█
eval/steps_per_second,█▁█▃█
train/epoch,▁▃▅▆██
train/global_step,▁▃▅▆██

0,1
eval/accuracy,0.83333
eval/f1,0.8
eval/loss,0.3791
eval/runtime,0.1515
eval/samples_per_second,39.607
eval/steps_per_second,6.601
total_flos,63147785748480.0
train/epoch,5
train/global_step,30
train_loss,0.36241


[34m[1mwandb[0m: Agent Starting Run: k497acc9 with config:
[34m[1mwandb[0m: 	learning_rate: 1.8892497303916172e-06
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 8
[34m[1mwandb[0m: 	warmup_ratio: 0.15
[34m[1mwandb[0m: 	weight_decay: 0.01
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.


Loading weights:   0%|          | 0/198 [00:00<?, ?it/s]

DebertaV2ForSequenceClassification LOAD REPORT from: microsoft/deberta-v3-base
Key                                     | Status     | 
----------------------------------------+------------+-
mask_predictions.classifier.weight      | UNEXPECTED | 
lm_predictions.lm_head.dense.bias       | UNEXPECTED | 
mask_predictions.LayerNorm.bias         | UNEXPECTED | 
lm_predictions.lm_head.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.bias   | UNEXPECTED | 
lm_predictions.lm_head.dense.weight     | UNEXPECTED | 
mask_predictions.dense.weight           | UNEXPECTED | 
mask_predictions.LayerNorm.weight       | UNEXPECTED | 
mask_predictions.classifier.bias        | UNEXPECTED | 
mask_predictions.dense.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.weight | UNEXPECTED | 
classifier.bias                         | MISSING    | 
classifier.weight                       | MISSING    | 
pooler.dense.bias                       | MISSING    | 
pooler.dense.weight      

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.647939,0.666667,0.5
2,No log,0.591244,0.666667,0.5
3,No log,0.516471,1.0,1.0
4,No log,0.48474,0.833333,0.8
5,No log,0.47428,0.833333,0.8


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['deberta.embeddings.LayerNorm.weight', 'deberta.embeddings.LayerNorm.bias', 'deberta.encoder.layer.0.attention.output.LayerNorm.weight', 'deberta.encoder.layer.0.attention.output.LayerNorm.bias', 'deberta.encoder.layer.0.output.LayerNorm.weight', 'deberta.encoder.layer.0.output.LayerNorm.bias', 'deberta.encoder.layer.1.attention.output.LayerNorm.weight', 'deberta.encoder.layer.1.attention.output.LayerNorm.bias', 'deberta.encoder.layer.1.output.LayerNorm.weight', 'deberta.encoder.layer.1.output.LayerNorm.bias', 'deberta.encoder.layer.2.attention.output.LayerNorm.weight', 'deberta.encoder.layer.2.attention.output.LayerNorm.bias', 'deberta.encoder.layer.2.output.LayerNorm.weight', 'deberta.encoder.layer.2.output.LayerNorm.bias', 'deberta.encoder.layer.3.attention.output.LayerNorm.weight', 'deberta.encoder.layer.3.attention.output.LayerNorm.bias', 'deberta.encoder.layer.3.output.LayerNorm.weight', 'deberta.encoder.layer.3.output.Laye

0,1
eval/accuracy,▁▁█▄▄
eval/f1,▁▁█▅▅
eval/loss,█▆▃▁▁
eval/runtime,▃█▁▆▂
eval/samples_per_second,▅▁█▃▇
eval/steps_per_second,▅▁█▃▇
train/epoch,▁▃▅▆██
train/global_step,▁▃▅▆██

0,1
eval/accuracy,0.83333
eval/f1,0.8
eval/loss,0.47428
eval/runtime,0.153
eval/samples_per_second,39.225
eval/steps_per_second,6.537
total_flos,63147785748480.0
train/epoch,5
train/global_step,30
train_loss,0.45091


[34m[1mwandb[0m: Agent Starting Run: jfix7gx1 with config:
[34m[1mwandb[0m: 	learning_rate: 3.4525127521082378e-06
[34m[1mwandb[0m: 	num_train_epochs: 5
[34m[1mwandb[0m: 	per_device_train_batch_size: 16
[34m[1mwandb[0m: 	warmup_ratio: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.05
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.


Loading weights:   0%|          | 0/198 [00:00<?, ?it/s]

DebertaV2ForSequenceClassification LOAD REPORT from: microsoft/deberta-v3-base
Key                                     | Status     | 
----------------------------------------+------------+-
mask_predictions.classifier.weight      | UNEXPECTED | 
lm_predictions.lm_head.dense.bias       | UNEXPECTED | 
mask_predictions.LayerNorm.bias         | UNEXPECTED | 
lm_predictions.lm_head.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.bias   | UNEXPECTED | 
lm_predictions.lm_head.dense.weight     | UNEXPECTED | 
mask_predictions.dense.weight           | UNEXPECTED | 
mask_predictions.LayerNorm.weight       | UNEXPECTED | 
mask_predictions.classifier.bias        | UNEXPECTED | 
mask_predictions.dense.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.weight | UNEXPECTED | 
classifier.bias                         | MISSING    | 
classifier.weight                       | MISSING    | 
pooler.dense.bias                       | MISSING    | 
pooler.dense.weight      

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.652871,0.666667,0.5
2,No log,0.629304,0.5,0.0
3,No log,0.586531,0.666667,0.5


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['deberta.embeddings.LayerNorm.weight', 'deberta.embeddings.LayerNorm.bias', 'deberta.encoder.layer.0.attention.output.LayerNorm.weight', 'deberta.encoder.layer.0.attention.output.LayerNorm.bias', 'deberta.encoder.layer.0.output.LayerNorm.weight', 'deberta.encoder.layer.0.output.LayerNorm.bias', 'deberta.encoder.layer.1.attention.output.LayerNorm.weight', 'deberta.encoder.layer.1.attention.output.LayerNorm.bias', 'deberta.encoder.layer.1.output.LayerNorm.weight', 'deberta.encoder.layer.1.output.LayerNorm.bias', 'deberta.encoder.layer.2.attention.output.LayerNorm.weight', 'deberta.encoder.layer.2.attention.output.LayerNorm.bias', 'deberta.encoder.layer.2.output.LayerNorm.weight', 'deberta.encoder.layer.2.output.LayerNorm.bias', 'deberta.encoder.layer.3.attention.output.LayerNorm.weight', 'deberta.encoder.layer.3.attention.output.LayerNorm.bias', 'deberta.encoder.layer.3.output.LayerNorm.weight', 'deberta.encoder.layer.3.output.Laye

0,1
eval/accuracy,█▁█
eval/f1,█▁█
eval/loss,█▆▁
eval/runtime,█▅▁
eval/samples_per_second,▁▄█
eval/steps_per_second,▁▄█
train/epoch,▁▅██
train/global_step,▁▅██

0,1
eval/accuracy,0.66667
eval/f1,0.5
eval/loss,0.58653
eval/runtime,0.1458
eval/samples_per_second,41.163
eval/steps_per_second,6.861
total_flos,37888671449088.0
train/epoch,3
train/global_step,9
train_loss,0.58755


In [None]:
import wandb

api = wandb.Api()

sweep = api.sweep(f"javierprior04-universidad-de-murcia/tfg-deberta-tuning/{sweep_id}")

best_run = sweep.best_run()

print("Mejor configuración encontrada:")
for key, value in best_run.config.items():
    print(f"  {key}: {value}")

[34m[1mwandb[0m: Sorting runs by -summary_metrics.eval_f1


Mejor configuración encontrada:
  bf16: False
  fp16: False
  fsdp: []
  seed: 42
  tf32: None
  debug: []
  dtype: float16
  optim: adamw_torch_fused
  legacy: True
  do_eval: True
  project: huggingface
  use_cpu: False
  do_train: False
  id2label: {'0': 'SYNTHETIC', '1': 'REAL'}
  label2id: {'REAL': 1, 'SYNTHETIC': 0}
  run_name: None
  data_seed: None
  deepspeed: None
  hub_token: <HUB_TOKEN>
  log_level: passive
  max_steps: -1
  report_to: ['wandb']
  use_cache: False
  adam_beta1: 0.9
  adam_beta2: 0.999
  do_predict: False
  eval_delay: 0
  eval_steps: None
  hidden_act: gelu
  local_rank: -1
  model_type: deberta-v2
  optim_args: None
  output_dir: ./temp_checkpoints
  save_steps: 500
  vocab_size: 128100
  ddp_backend: None
  ddp_timeout: 1800
  fsdp_config: {'xla': False, 'xla_fsdp_v2': False, 'min_num_params': 0, 'xla_fsdp_grad_ckpt': False}
  hidden_size: 768
  label_names: None
  logging_dir: None
  push_to_hub: False
  return_dict: True
  adam_epsilon: 1e-08
  bos_toke

## Entrenar con los mejores hyperparametros

In [None]:
# --- CELDA DE ENTRENAMIENTO FINAL ---

# 1. Configura aquí los mejores parámetros encontrados por el Sweep
best_config = {
    "learning_rate": 1.4621801161577178e-06,
    "num_train_epochs": 5,
    "batch_size": 8
}

id2label = {0: "SYNTHETIC", 1: "REAL"}
label2id = {"SYNTHETIC": 0, "REAL": 1}

model_final = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/deberta-v3-base",
    num_labels=2,
    id2label=id2label,
    label2id=label2id
)

final_args = TrainingArguments(
    seed=42,
    data_seed=42,
    full_determinism=True,
    output_dir="./resultados_finales",
    learning_rate=best_config["learning_rate"],
    num_train_epochs=best_config["num_train_epochs"],
    per_device_train_batch_size=best_config["batch_size"],
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    fp16=False
)

trainer = Trainer(
    model=model_final,
    args=final_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

trainer.train()

eval_test = trainer.evaluate(tokenized_dataset["test"])
print(f"Resultados en el conjunto de TEST: {eval_test}")

import os

save_path = "/content/drive/MyDrive/TFG/modelos/deberta_v1_sin_limpieza"
# save_path = "/content/drive/MyDrive/TFG/modelos/deberta_v1_con_limpieza"

trainer.save_model(save_path)

Loading weights:   0%|          | 0/198 [00:00<?, ?it/s]

DebertaV2ForSequenceClassification LOAD REPORT from: microsoft/deberta-v3-base
Key                                     | Status     | 
----------------------------------------+------------+-
mask_predictions.classifier.weight      | UNEXPECTED | 
lm_predictions.lm_head.dense.bias       | UNEXPECTED | 
mask_predictions.LayerNorm.bias         | UNEXPECTED | 
lm_predictions.lm_head.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.bias   | UNEXPECTED | 
lm_predictions.lm_head.dense.weight     | UNEXPECTED | 
mask_predictions.dense.weight           | UNEXPECTED | 
mask_predictions.LayerNorm.weight       | UNEXPECTED | 
mask_predictions.classifier.bias        | UNEXPECTED | 
mask_predictions.dense.bias             | UNEXPECTED | 
lm_predictions.lm_head.LayerNorm.weight | UNEXPECTED | 
classifier.bias                         | MISSING    | 
classifier.weight                       | MISSING    | 
pooler.dense.bias                       | MISSING    | 
pooler.dense.weight      

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.642139,0.666667,0.666667
2,No log,0.624608,0.666667,0.666667
3,No log,0.605926,0.833333,0.857143
4,No log,0.589577,0.833333,0.857143
5,No log,0.583717,0.833333,0.857143


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['deberta.embeddings.LayerNorm.weight', 'deberta.embeddings.LayerNorm.bias', 'deberta.encoder.layer.0.attention.output.LayerNorm.weight', 'deberta.encoder.layer.0.attention.output.LayerNorm.bias', 'deberta.encoder.layer.0.output.LayerNorm.weight', 'deberta.encoder.layer.0.output.LayerNorm.bias', 'deberta.encoder.layer.1.attention.output.LayerNorm.weight', 'deberta.encoder.layer.1.attention.output.LayerNorm.bias', 'deberta.encoder.layer.1.output.LayerNorm.weight', 'deberta.encoder.layer.1.output.LayerNorm.bias', 'deberta.encoder.layer.2.attention.output.LayerNorm.weight', 'deberta.encoder.layer.2.attention.output.LayerNorm.bias', 'deberta.encoder.layer.2.output.LayerNorm.weight', 'deberta.encoder.layer.2.output.LayerNorm.bias', 'deberta.encoder.layer.3.attention.output.LayerNorm.weight', 'deberta.encoder.layer.3.attention.output.LayerNorm.bias', 'deberta.encoder.layer.3.output.LayerNorm.weight', 'deberta.encoder.layer.3.output.Laye

Resultados en el conjunto de TEST: {'eval_loss': 0.4825645089149475, 'eval_accuracy': 0.875, 'eval_f1': 0.8571428571428571, 'eval_runtime': 0.2052, 'eval_samples_per_second': 38.98, 'eval_steps_per_second': 4.872, 'epoch': 5.0}


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]