# Paquetes

In [None]:
import os
import json
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
from google.colab import drive
from collections import Counter
from datasets import Dataset, concatenate_datasets, load_dataset
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, TaskType, PeftModel
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, balanced_accuracy_score
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq, AutoTokenizer, AutoModelForSeq2SeqLM, BitsAndBytesConfig

# Cargar datos

- Montamos un Drive para acceder a los archivos.

- Definimos una función para cargar datasets en formato JSON.

- Cargamos 3 conjuntos de entrenamiento (*train1, train2, train3*), 3 de validación (*val1, val2, val3*) y 3 de prueba (*test1, test2, test3*).

In [None]:
# Montar Google Drive
drive.mount('/content/drive', force_remount=True)

# Función para cargar un dataset dado un archivo .txt (formato JSON)
def cargar_dataset(ruta):
    return Dataset.from_pandas(pd.read_json(ruta, lines=True))

# Rutas
base_path = "/content/drive/MyDrive/Colab Notebooks"

rutas = {
    "train1": f"{base_path}/train1.txt",
    "val1": f"{base_path}/val1.txt",
    "test1": f"{base_path}/test1.txt",
    "train2": f"{base_path}/train2.txt",
    "val2": f"{base_path}/val2.txt",
    "test2": f"{base_path}/test2.txt",
    "train3": f"{base_path}/train3.txt",
    "val3": f"{base_path}/val3.txt",
    "test3": f"{base_path}/test3.txt",
}

# Cargar todos los datasets
datasets = {nombre: cargar_dataset(ruta) for nombre, ruta in rutas.items()}

Mounted at /content/drive


In [None]:
# Verificamos si se han tomado bien los archivos
print(len(datasets["train1"]))
print(len(datasets["train2"]))
print(len(datasets["train3"]))
print(len(datasets["val1"]))
print(len(datasets["val2"]))
print(len(datasets["val3"]))
print(len(datasets["test1"]))
print(len(datasets["test2"]))
print(len(datasets["test3"]))

9467
3763
4572
1812
2553
1877
2553
1877
640


In [None]:
def imprimir_conteo(dataset_dict, split_names=None, output_col="output"):
    """
    Imprime los conteos de valores para una columna específica en múltiples datasets.

    Args:
        dataset_dict: Diccionario de datasets (ej. datasets["train1"], datasets["val1"], etc.)
        split_names: Lista de tuplas con (nombre_split, nombres_datasets).
                     Si es None, usa los valores por defecto.
        output_col: Nombre de la columna a contar (por defecto "output")
    """
    # Configuración por defecto si no se proporciona split_names
    if split_names is None:
        split_names = [
            ("Train", ["train1", "train2", "train3"]),
            ("Validation", ["val1", "val2", "val3"]),
            ("Test", ["test1", "test2", "test3"])
        ]

    for split_label, dataset_list in split_names:
        print(f"{split_label}:")
        print("-" * 40)

        for ds_name in dataset_list:
            if ds_name in dataset_dict:
                counts = Counter(dataset_dict[ds_name][output_col])
                print(f"{ds_name}:")
                print(pd.Series(counts).sort_index())
                print("-" * 40)
            else:
                print(f"{ds_name}: No encontrado en el diccionario de datasets")
                print("-" * 40)

        print("=" * 40)

imprimir_conteo(datasets)

Train:
----------------------------------------
train1:
no     7749
yes    1718
dtype: int64
----------------------------------------
train2:
no     2486
yes    1277
dtype: int64
----------------------------------------
train3:
no     3427
yes    1145
dtype: int64
----------------------------------------
Validation:
----------------------------------------
val1:
no     1294
yes     518
dtype: int64
----------------------------------------
val2:
no     2019
yes     534
dtype: int64
----------------------------------------
val3:
no     1485
yes     392
dtype: int64
----------------------------------------
Test:
----------------------------------------
test1:
no     2019
yes     534
dtype: int64
----------------------------------------
test2:
no     1485
yes     392
dtype: int64
----------------------------------------
test3:
no     351
yes    289
dtype: int64
----------------------------------------


In [None]:
def balance_yes_no_dataset(dataset_dict, dataset_name, output_col="output", n_yes=None, random_state=33):
    """
    Balancea un dataset para que tenga el número de muestras 'yes' especificado y
    el mismo número de muestras 'no' en la columna especificada.

    Args:
        dataset_dict: El diccionario de datasets de Hugging Face (como datasets["train1"])
        dataset_name: Nombre del dataset a balancear (ej. "train1")
        output_col: Nombre de la columna que contiene las etiquetas "yes"/"no" (por defecto "output")
        n_yes: Número de muestras 'yes' que se quieren seleccionar (por defecto None → usa todas)
        random_state: Semilla para reproducibilidad (por defecto 33)

    Returns:
        El dataset balanceado (modifica el dataset_dict original inplace y también lo devuelve)
    """
    # Convertir a pandas
    df = dataset_dict[dataset_name].to_pandas()

    # Filtrar y convertir etiquetas
    df_yes = df[df[output_col].str.strip().str.lower() == "yes"]
    df_no = df[df[output_col].str.strip().str.lower() == "no"]

    # Si no se especifica n_yes, usa el mínimo entre ambos
    if n_yes is None:
        n_yes = min(len(df_yes), len(df_no))

    # Tomar muestras aleatorias
    df_yes_sample = df_yes.sample(n=n_yes, random_state=random_state)
    df_no_sample = df_no.sample(n=n_yes, random_state=random_state)

    # Recombinar y barajar aleatoriamente
    balanced_df = pd.concat([df_yes_sample, df_no_sample]).sample(
        frac=1, random_state=random_state).reset_index(drop=True)

    # Volver a Dataset y actualizar el diccionario original
    dataset_dict[dataset_name] = Dataset.from_pandas(balanced_df)

    return dataset_dict[dataset_name]

In [None]:
balance_yes_no_dataset(datasets, "train1", n_yes=250)
balance_yes_no_dataset(datasets, "train2", n_yes=250)
balance_yes_no_dataset(datasets, "train3", n_yes=250)

Dataset({
    features: ['instruction', 'output'],
    num_rows: 500
})

In [None]:
imprimir_conteo(datasets)

Train:
----------------------------------------
train1:
no     250
yes    250
dtype: int64
----------------------------------------
train2:
no     250
yes    250
dtype: int64
----------------------------------------
train3:
no     250
yes    250
dtype: int64
----------------------------------------
Validation:
----------------------------------------
val1:
no     1294
yes     518
dtype: int64
----------------------------------------
val2:
no     2019
yes     534
dtype: int64
----------------------------------------
val3:
no     1485
yes     392
dtype: int64
----------------------------------------
Test:
----------------------------------------
test1:
no     2019
yes     534
dtype: int64
----------------------------------------
test2:
no     1485
yes     392
dtype: int64
----------------------------------------
test3:
no     351
yes    289
dtype: int64
----------------------------------------


# Tokenización

- Cargamos el tokenizador de *FLAN-T5*.

- Convertimos el texto en tokens numéricos.

- Ajustamos una longitud fija (128 para inputs, 2 para outputs).

In [None]:
model_id = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_id)

def tokenize(example):
    model_inputs = tokenizer(
        example["instruction"],
        truncation=True,
        padding="max_length",
        max_length=128
    )
    labels = tokenizer(
        example["output"],
        truncation=True,
        padding="max_length",
        max_length=2
    )
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_datasets = {k: v.map(tokenize) for k, v in datasets.items()}

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/1812 [00:00<?, ? examples/s]

Map:   0%|          | 0/2553 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/2553 [00:00<?, ? examples/s]

Map:   0%|          | 0/1877 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/1877 [00:00<?, ? examples/s]

Map:   0%|          | 0/640 [00:00<?, ? examples/s]

# Configuración de entrenamiento y LoRa

- Definimos hiperparámetros de entrenamiento (batch size, épocas, ...).

- Configuramos LoRa para fine-tunning eficiente.

- Con la función de entrenar_y_guardar:

  - Entrenamos un modelo separado para cada conjunto de entrenamiento.
  - Guardamos cada modelo ajustado en disco.

In [None]:
# Configuración LoRA y entrenamiento
training_args = TrainingArguments(
    output_dir="./checkpoints",
    per_device_train_batch_size=6,
    num_train_epochs=6,
    learning_rate=2e-5,
    logging_steps=50,
    save_strategy="no",
    report_to="none"
)

data_collator = DataCollatorForSeq2Seq(tokenizer)

lora_config = LoraConfig(
    r=16,                           
    lora_alpha=32,                  # ajustado a r (doble)
    target_modules=["q", "k", "v", "o", "wi", "wo"],
    lora_dropout=0.0,               # quitamos regularización porque el dataset es pequeño
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

In [None]:
def preparar_modelo_lora():
    base_model = AutoModelForSeq2SeqLM.from_pretrained(
        model_id,
        device_map="cpu",
        torch_dtype=torch.float32,
        low_cpu_mem_usage=True
    )
    model = get_peft_model(base_model, lora_config)
    return model

def entrenar_y_guardar(dataset, output_path):
    if os.path.exists(output_path):
        print(f"Ya existe el modelo en {output_path}, se omite entrenamiento.")
        return
    print(f"Entrenando modelo y guardando en {output_path}")
    model = preparar_modelo_lora()

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset,
        tokenizer=tokenizer,
        data_collator=data_collator
    )

    trainer.train()

    # Guarda solo los adaptadores LoRA
    model.save_pretrained(output_path)
    tokenizer.save_pretrained(output_path)

# Evaluación

- Evaluamos el modelo en ejemplos de prueba.

- Comparamos modelos base vs. modelos con fine-tunning (LoRA).

- Calculamos distintas métricas (accuracy, precision, recall, ...).

In [None]:
def evaluar_modelo_y_predicciones(nombre_modelo, rutas_txt, max_ejemplos=10000,
                                   imprimir_predicciones=False, modo="lora",
                                   balanced_dataset=False, random_state=33):
    resultados = {}
    base_model_id = model_id
    tokenizer = AutoTokenizer.from_pretrained(
        f"./{nombre_modelo}" if modo == "lora" else base_model_id
    )
    if modo == "lora":
        base_model = AutoModelForSeq2SeqLM.from_pretrained(base_model_id).to("cpu")
        model = PeftModel.from_pretrained(base_model, f"./{nombre_modelo}").to("cpu")
    else:
        model = AutoModelForSeq2SeqLM.from_pretrained(base_model_id).to("cpu")

    if isinstance(rutas_txt, str):
        rutas_txt = [rutas_txt]

    for ruta in rutas_txt:
        nombre_base = os.path.splitext(os.path.basename(ruta))[0]
        inputs_raw, y_true_str, y_pred_str, y_pred_raw_str = [], [], [], []

        with open(ruta, "r", encoding="utf-8") as f:
            for i, line in enumerate(f):
                if i >= max_ejemplos:
                    break
                inputs_raw.append(json.loads(line))

        # Balanceo si se solicita
        if balanced_dataset:
            yes_samples = [ej for ej in inputs_raw if ej["output"].strip().lower() == "yes"]
            no_samples = [ej for ej in inputs_raw if ej["output"].strip().lower() == "no"]
            n = min(len(yes_samples), len(no_samples))
            rng = np.random.default_rng(seed=random_state)
            yes_sampled = rng.choice(yes_samples, size=n, replace=False).tolist()
            no_sampled = rng.choice(no_samples, size=n, replace=False).tolist()
            inputs = yes_sampled + no_sampled
            rng.shuffle(inputs)
            print(f"[{nombre_base}] Balanceado a {n} 'yes' y {n} 'no' (total {2*n})")
        else:
            inputs = inputs_raw

        for ejemplo in tqdm(inputs, desc=f"Evaluando {nombre_base} ({modo})", unit="ejemplo"):
            frase = ejemplo["instruction"]
            real = ejemplo["output"].strip().lower()
            encoded = tokenizer(frase, return_tensors="pt", return_token_type_ids=False).to(model.device)
            output_ids = model.generate(**encoded, max_new_tokens=10, num_beams=1, do_sample=False)
            pred_raw = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip().lower()
            pred = "yes" if ("yes" in pred_raw.lower()) or ("sí" in pred_raw.lower()) or (len(pred_raw.strip()) == 0) else "no"
            y_true_str.append(real)
            y_pred_str.append(pred)
            y_pred_raw_str.append(pred_raw)
            if imprimir_predicciones:
                print(f"Entrada: {frase}")
                print(f"→ Predicción raw: {pred_raw}")
                print(f"→ Etiqueta real: {real}\n")

        y_true = [1 if y == "yes" else 0 for y in y_true_str]
        y_pred = [1 if y == "yes" else 0 for y in y_pred_str]

        metricas = {
            "accuracy": accuracy_score(y_true, y_pred),
            "precision": precision_score(y_true, y_pred, zero_division=0),
            "recall": recall_score(y_true, y_pred, zero_division=0),
            "f1_score": f1_score(y_true, y_pred, zero_division=0),
            "balanced_accuracy": balanced_accuracy_score(y_true, y_pred),
            "tp": sum((yt == 1 and yp == 1) for yt, yp in zip(y_true, y_pred)),
            "fp": sum((yt == 0 and yp == 1) for yt, yp in zip(y_true, y_pred)),
            "fn": sum((yt == 1 and yp == 0) for yt, yp in zip(y_true, y_pred)),
            "tn": sum((yt == 0 and yp == 0) for yt, yp in zip(y_true, y_pred)),
        }

        resultados[nombre_base] = {
            "inputs": inputs,
            "y_true": y_true_str,
            "y_pred": y_pred_str,
            "y_pred_raw": y_pred_raw_str,
            "metricas": metricas
        }

    return metricas

def evaluar_split_completo(split, evaluar_llm=True, entrenar_lora=True, continuar_lora=True,
                           evaluar_lora_train=True, evaluar_lora_val=True, evaluar_lora_test=True,
                           balancear_val=True, evaluar_balanceado=False, n_yes_balanceo=250):

    resultados = {}
    nombre_train = f"modelo_split{split}_train"
    nombre_trainval = f"modelo_split{split}_trainval"

    # === Evaluar modelo base (Flan-T5 sin afinar) ===
    if evaluar_llm:
        print(f"\n[LLM] Evaluando modelo base sin entrenamiento en split {split}")
        for subset in ["train", "val", "test"]:
            nombre = f"llm_{subset}"
            resultados[nombre] = evaluar_modelo_y_predicciones(
                nombre_modelo=model_id,
                rutas_txt=rutas[f"{subset}{split}"],
                modo="base",
                balanced_dataset=evaluar_balanceado
            )

    # === Entrenamiento LoRA con dataset train ===
    if entrenar_lora:
        print(f"\n[LoRA] Entrenando modelo con train para split {split}")
        entrenar_y_guardar(
            dataset=tokenized_datasets[f"train{split}"],
            output_path=f"./{nombre_train}"
        )

    # === Evaluación LoRA en train/val ===
    if evaluar_lora_train:
        print(f"\n[LoRA] Evaluando modelo LoRA (train) en train")
        if not os.path.exists(f"./{nombre_train}"):
            print(f"No existe el modelo {nombre_train}, se omite esta evaluación.")
        else:
            resultados["lora_train"] = evaluar_modelo_y_predicciones(
                nombre_modelo=nombre_train,
                rutas_txt=rutas[f"train{split}"],
                modo="lora",
                balanced_dataset=evaluar_balanceado
            )

    if evaluar_lora_val:
        print(f"\n[LoRA] Evaluando modelo LoRA (train) en val")
        if not os.path.exists(f"./{nombre_train}"):
            print(f"No existe el modelo {nombre_train}, se omite esta evaluación.")
        else:
            resultados["lora_val"] = evaluar_modelo_y_predicciones(
                nombre_modelo=nombre_train,
                rutas_txt=rutas[f"val{split}"],
                modo="lora",
                balanced_dataset=evaluar_balanceado
            )

    # === Continuar entrenamiento LoRA con train + val ===
    if continuar_lora:
        print(f"\n[LoRA] Entrenando modelo final (train + val) para split {split}")
        nombre_val = f"val{split}"

        if balancear_val:
            balance_yes_no_dataset(datasets, nombre_val, n_yes=n_yes_balanceo)
            print(f"Dataset val balanceado con {n_yes_balanceo} 'yes' y 'no'")
        else:
            print("No se ha balanceado el conjunto de validación")

        # Tokenizar val balanceado
        tokenized_datasets[nombre_val] = datasets[nombre_val].map(tokenize)

        # Concatenar datasets
        dataset_combinado = concatenate_datasets([
            tokenized_datasets[f"train{split}"],
            tokenized_datasets[nombre_val]
        ])
        print(f"Tamaño total de train+val: {len(dataset_combinado)}")

        entrenar_y_guardar(
            dataset=dataset_combinado,
            output_path=f"./{nombre_trainval}"
        )

    # === Evaluación final en test ===
    if evaluar_lora_test:
        print(f"\n[LoRA] Evaluando modelo final (train+val) en test")
        if not os.path.exists(f"./{nombre_trainval}"):
            print(f" No existe el modelo {nombre_trainval}, se omite esta evaluación.")
        else:
            resultados["lora_test"] = evaluar_modelo_y_predicciones(
                nombre_modelo=nombre_trainval,
                rutas_txt=rutas[f"test{split}"],
                modo="lora",
                balanced_dataset=evaluar_balanceado
            )

    return resultados

# Resultados

## LLM

### Split 1

In [None]:
resultados_split1_llm = evaluar_split_completo(split=1, evaluar_llm=True, entrenar_lora=False,
                                               continuar_lora=False, evaluar_lora_train=False,
                                               evaluar_lora_val=False, evaluar_lora_test=False)


[LLM] Evaluando modelo base sin entrenamiento en split 1


Evaluando train1 (base): 100%|██████████| 9467/9467 [1:18:17<00:00,  2.02ejemplo/s]
Evaluando val1 (base): 100%|██████████| 1812/1812 [14:56<00:00,  2.02ejemplo/s]
Evaluando test1 (base): 100%|██████████| 2553/2553 [20:54<00:00,  2.04ejemplo/s]


In [None]:
resultados_split1_llm

{'llm_train': {'accuracy': 0.8160980247174395,
  'precision': 0.04,
  'recall': 0.0005820721769499418,
  'f1_score': 0.0011474469305794606,
  'balanced_accuracy': np.float64(0.4987424491740344),
  'tp': 1,
  'fp': 24,
  'fn': 1717,
  'tn': 7725},
 'llm_val': {'accuracy': 0.7091611479028698,
  'precision': 0.0,
  'recall': 0.0,
  'f1_score': 0.0,
  'balanced_accuracy': np.float64(0.4965224111282844),
  'tp': 0,
  'fp': 9,
  'fn': 518,
  'tn': 1285},
 'llm_test': {'accuracy': 0.790834312573443,
  'precision': 0.0,
  'recall': 0.0,
  'f1_score': 0.0,
  'balanced_accuracy': np.float64(0.5),
  'tp': 0,
  'fp': 0,
  'fn': 534,
  'tn': 2019}}

### Split 2

In [None]:
resultados_split2_llm = evaluar_split_completo(split=2, evaluar_llm=True, entrenar_lora=False,
                                               continuar_lora=False, evaluar_lora_train=False,
                                               evaluar_lora_val=False, evaluar_lora_test=False)


[LLM] Evaluando modelo base sin entrenamiento en split 2


Evaluando train2 (base): 100%|██████████| 3763/3763 [54:17<00:00,  1.16ejemplo/s]
Evaluando val2 (base): 100%|██████████| 2553/2553 [36:34<00:00,  1.16ejemplo/s]
Evaluando test2 (base): 100%|██████████| 1877/1877 [26:45<00:00,  1.17ejemplo/s]


In [None]:
resultados_split2_llm

{'llm_train': {'accuracy': 0.6579856497475418,
  'precision': 0.0,
  'recall': 0.0,
  'f1_score': 0.0,
  'balanced_accuracy': np.float64(0.49798873692679),
  'tp': 0,
  'fp': 10,
  'fn': 1277,
  'tn': 2476},
 'llm_val': {'accuracy': 0.790834312573443,
  'precision': 0.0,
  'recall': 0.0,
  'f1_score': 0.0,
  'balanced_accuracy': np.float64(0.5),
  'tp': 0,
  'fp': 0,
  'fn': 534,
  'tn': 2019},
 'llm_test': {'accuracy': 0.7911561001598295,
  'precision': 0.0,
  'recall': 0.0,
  'f1_score': 0.0,
  'balanced_accuracy': np.float64(0.5),
  'tp': 0,
  'fp': 0,
  'fn': 392,
  'tn': 1485}}

### Split 3

In [None]:
resultados_split3_llm = evaluar_split_completo(split=3, evaluar_llm=True, entrenar_lora=False,
                                               continuar_lora=False, evaluar_lora_train=False,
                                               evaluar_lora_val=False, evaluar_lora_test=False)


[LLM] Evaluando modelo base sin entrenamiento en split 3


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Evaluando train3 (base): 100%|██████████| 4572/4572 [37:28<00:00,  2.03ejemplo/s]
Evaluando val3 (base): 100%|██████████| 1877/1877 [15:11<00:00,  2.06ejemplo/s]
Evaluando test3 (base): 100%|██████████| 640/640 [05:10<00:00,  2.06ejemplo/s]


In [None]:
resultados_split3_llm

{'llm_train': {'accuracy': 0.747594050743657,
  'precision': 0.0,
  'recall': 0.0,
  'f1_score': 0.0,
  'balanced_accuracy': np.float64(0.4986868981616574),
  'tp': 0,
  'fp': 9,
  'fn': 1145,
  'tn': 3418},
 'llm_val': {'accuracy': 0.7911561001598295,
  'precision': 0.0,
  'recall': 0.0,
  'f1_score': 0.0,
  'balanced_accuracy': np.float64(0.5),
  'tp': 0,
  'fp': 0,
  'fn': 392,
  'tn': 1485},
 'llm_test': {'accuracy': 0.5484375,
  'precision': 0.0,
  'recall': 0.0,
  'f1_score': 0.0,
  'balanced_accuracy': np.float64(0.5),
  'tp': 0,
  'fp': 0,
  'fn': 289,
  'tn': 351}}

## LoRa

### Split 1

#### Train

In [None]:
entrenamiento_split1 = evaluar_split_completo(split=1, evaluar_llm=False, entrenar_lora=True,
                                                      continuar_lora=False, evaluar_lora_train=False,
                                                      evaluar_lora_val=False, evaluar_lora_test=False)


[LoRA] Entrenando con train para split 1
Entrenando modelo y guardando en ./modelo_split1_train


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss
50,1.7496
100,1.0317
150,0.5624
200,0.4464
250,0.393
300,0.4001
350,0.4132
400,0.3848
450,0.3979
500,0.3719


In [None]:
resultados_split1_lora_train = evaluar_split_completo(split=1, evaluar_llm=False, entrenar_lora=True,
                                                      continuar_lora=False, evaluar_lora_train=True,
                                                      evaluar_lora_val=False, evaluar_lora_test=False)


[LoRA] Entrenando con train para split 1
Ya existe el modelo en ./modelo_split1_train, se omite entrenamiento.

[LoRA] Evaluando modelo entrenado con train en train


Evaluando train1 (lora): 100%|██████████| 9467/9467 [1:40:35<00:00,  1.57ejemplo/s]


In [None]:
resultados_split1_lora_train

{'lora_train': {'accuracy': 0.1816837435301574,
  'precision': 0.1815108293713682,
  'recall': 1.0,
  'f1_score': 0.30725207904855584,
  'balanced_accuracy': np.float64(0.5001290489095367),
  'tp': 1718,
  'fp': 7747,
  'fn': 0,
  'tn': 2}}

#### Validation

In [None]:
resultados_split1_lora_val = evaluar_split_completo(split=1, evaluar_llm=False, entrenar_lora=True,
                                                      continuar_lora=False, evaluar_lora_train=False,
                                                      evaluar_lora_val=True, evaluar_lora_test=False)


[LoRA] Entrenando con train para split 1
Ya existe el modelo en ./modelo_split1_train, se omite entrenamiento.

[LoRA] Evaluando modelo entrenado con train en val


Evaluando val1 (lora): 100%|██████████| 1812/1812 [19:10<00:00,  1.57ejemplo/s]


In [None]:
resultados_split1_lora_val

{'lora_val': {'accuracy': 0.2858719646799117,
  'precision': 0.2858719646799117,
  'recall': 1.0,
  'f1_score': 0.4446351931330472,
  'balanced_accuracy': np.float64(0.5),
  'tp': 518,
  'fp': 1294,
  'fn': 0,
  'tn': 0}}

#### Test

In [None]:
resultados_split1_lora_test = evaluar_split_completo(split=1, evaluar_llm=False, entrenar_lora=False,
                                                      continuar_lora=True, evaluar_lora_train=False,
                                                      evaluar_lora_val=False, evaluar_lora_test=True)


[LoRA] Entrenando con train+val para split 1


Map:   0%|          | 0/500 [00:00<?, ? examples/s]

  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


Tamaño total de train+val: 1000
Entrenando modelo y guardando en ./modelo_split1_trainval


  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
50,1.6607
100,0.9779
150,0.5035
200,0.4307
250,0.4116
300,0.4068
350,0.3751
400,0.3654
450,0.3927
500,0.3876



[LoRA] Evaluando modelo final (train+val) en test


Evaluando test1 (lora): 100%|██████████| 2553/2553 [27:10<00:00,  1.57ejemplo/s]


In [None]:
resultados_split1_lora_test

{'lora_test': {'accuracy': 0.4888366627497062,
  'precision': 0.25272610647851185,
  'recall': 0.7378277153558053,
  'f1_score': 0.3764930721452461,
  'balanced_accuracy': np.float64(0.5804046947259462),
  'tp': 394,
  'fp': 1165,
  'fn': 140,
  'tn': 854}}

### Split 2

#### Train

In [None]:
entrenamiento_split2 = evaluar_split_completo(split=2, evaluar_llm=False, entrenar_lora=True,
                                                      continuar_lora=False, evaluar_lora_train=False,
                                                      evaluar_lora_val=False, evaluar_lora_test=False)


[LoRA] Entrenando con train para split 2
Entrenando modelo y guardando en ./modelo_split2_train


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss
50,1.5351
100,0.9507
150,0.472
200,0.4472
250,0.3857
300,0.4038
350,0.4076
400,0.3811
450,0.3974
500,0.388


In [None]:
resultados_split2_lora_train = evaluar_split_completo(split=2, evaluar_llm=False, entrenar_lora=True,
                                                      continuar_lora=False, evaluar_lora_train=True,
                                                      evaluar_lora_val=False, evaluar_lora_test=False)


[LoRA] Entrenando con train para split 2
Ya existe el modelo en ./modelo_split2_train, se omite entrenamiento.

[LoRA] Evaluando modelo entrenado con train en train


generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Evaluando train2 (lora): 100%|██████████| 3763/3763 [56:34<00:00,  1.11ejemplo/s]


In [None]:
resultados_split2_lora_train

{'lora_train': {'accuracy': 0.3478607494020728,
  'precision': 0.34107480421280045,
  'recall': 0.9890368050117463,
  'f1_score': 0.5072289156626506,
  'balanced_accuracy': np.float64(0.503770212642639),
  'tp': 1263,
  'fp': 2440,
  'fn': 14,
  'tn': 46}}

#### Validation

In [None]:
resultados_split2_lora_val = evaluar_split_completo(split=2, evaluar_llm=False, entrenar_lora=True,
                                                      continuar_lora=False, evaluar_lora_train=False,
                                                      evaluar_lora_val=True, evaluar_lora_test=False)


[LoRA] Entrenando con train para split 2
Ya existe el modelo en ./modelo_split2_train, se omite entrenamiento.

[LoRA] Evaluando modelo entrenado con train en val


Evaluando val2 (lora): 100%|██████████| 2553/2553 [38:05<00:00,  1.12ejemplo/s]


In [None]:
resultados_split2_lora_val

{'lora_val': {'accuracy': 0.31139835487661577,
  'precision': 0.216404077849861,
  'recall': 0.8745318352059925,
  'f1_score': 0.34695393759286774,
  'balanced_accuracy': np.float64(0.5184942484598561),
  'tp': 467,
  'fp': 1691,
  'fn': 67,
  'tn': 328}}

#### Test

In [None]:
resultados_split2_lora_test = evaluar_split_completo(split=2, evaluar_llm=False, entrenar_lora=False,
                                                      continuar_lora=True, evaluar_lora_train=False,
                                                      evaluar_lora_val=False, evaluar_lora_test=True)


[LoRA] Entrenando con train+val para split 2


Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Tamaño total de train+val: 1000
Entrenando modelo y guardando en ./modelo_split2_trainval


  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss
50,1.2729
100,0.8396
150,0.5186
200,0.3969
250,0.4041
300,0.4268
350,0.3713
400,0.3829
450,0.3845
500,0.3657



[LoRA] Evaluando modelo final (train+val) en test


Evaluando test2 (lora): 100%|██████████| 1877/1877 [21:15<00:00,  1.47ejemplo/s]


In [None]:
resultados_split2_lora_test

{'lora_test': {'accuracy': 0.3036760788492275,
  'precision': 0.19762062128222074,
  'recall': 0.7627551020408163,
  'f1_score': 0.31391076115485567,
  'balanced_accuracy': np.float64(0.4726233422661994),
  'tp': 299,
  'fp': 1214,
  'fn': 93,
  'tn': 271}}

### Split 3

#### Train

In [None]:
entrenamiento_split3 = evaluar_split_completo(split=3, evaluar_llm=False, entrenar_lora=True,
                                                      continuar_lora=False, evaluar_lora_train=False,
                                                      evaluar_lora_val=False, evaluar_lora_test=False)


[LoRA] Entrenando con train para split 3
Entrenando modelo y guardando en ./modelo_split3_train


  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
50,1.4155
100,0.897
150,0.4821
200,0.4547
250,0.3975
300,0.3907
350,0.4184
400,0.3947
450,0.3883
500,0.3856


In [None]:
resultados_split3_lora_train = evaluar_split_completo(split=3, evaluar_llm=False, entrenar_lora=True,
                                                      continuar_lora=False, evaluar_lora_train=True,
                                                      evaluar_lora_val=False, evaluar_lora_test=False)


[LoRA] Entrenando con train para split 3
Ya existe el modelo en ./modelo_split3_train, se omite entrenamiento.

[LoRA] Evaluando modelo entrenado con train en train


Evaluando train3 (lora): 100%|██████████| 4572/4572 [53:45<00:00,  1.42ejemplo/s]


In [None]:
resultados_split3_lora_train

{'lora_train': {'accuracy': 0.29133858267716534,
  'precision': 0.2564519879097884,
  'recall': 0.9633187772925764,
  'f1_score': 0.4050679397723099,
  'balanced_accuracy': np.float64(0.5150705354218937),
  'tp': 1103,
  'fp': 3198,
  'fn': 42,
  'tn': 229}}

#### Validation

In [None]:
resultados_split3_lora_val = evaluar_split_completo(split=3, evaluar_llm=False, entrenar_lora=True,
                                                      continuar_lora=False, evaluar_lora_train=False,
                                                      evaluar_lora_val=True, evaluar_lora_test=False)


[LoRA] Entrenando con train para split 3
Ya existe el modelo en ./modelo_split3_train, se omite entrenamiento.

[LoRA] Evaluando modelo entrenado con train en val


Evaluando val3 (lora): 100%|██████████| 1877/1877 [22:05<00:00,  1.42ejemplo/s]


In [None]:
resultados_split3_lora_val

{'lora_val': {'accuracy': 0.24134256792754397,
  'precision': 0.2,
  'recall': 0.8775510204081632,
  'f1_score': 0.32575757575757575,
  'balanced_accuracy': np.float64(0.4754758469044183),
  'tp': 344,
  'fp': 1376,
  'fn': 48,
  'tn': 109}}

#### Test

In [None]:
resultados_split3_lora_test = evaluar_split_completo(split=3, evaluar_llm=False, entrenar_lora=False,
                                                      continuar_lora=True, evaluar_lora_train=False,
                                                      evaluar_lora_val=False, evaluar_lora_test=True)


[LoRA] Entrenando con train+val para split 3


Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Tamaño total de train+val: 1000
Entrenando modelo y guardando en ./modelo_split3_trainval


  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss
50,1.234
100,0.7989
150,0.5002
200,0.4026
250,0.4099
300,0.3839
350,0.3844
400,0.3971
450,0.4021
500,0.3758



[LoRA] Evaluando modelo final (train+val) en test


Evaluando test3 (lora): 100%|██████████| 640/640 [09:20<00:00,  1.14ejemplo/s]


In [None]:
resultados_split3_lora_test

{'lora_test': {'accuracy': 0.4859375,
  'precision': 0.4619771863117871,
  'recall': 0.8408304498269896,
  'f1_score': 0.596319018404908,
  'balanced_accuracy': np.float64(0.5172813217795916),
  'tp': 243,
  'fp': 283,
  'fn': 46,
  'tn': 68}}