# Ensembles approach

In [None]:
COLAB =True # IF YOU USE GOOGLE COLAB -> COLAB = True
PIP = True # IF YOU NEED INSTALL LIBRARIES -> PIP = True

In [None]:
if PIP:
    !pip install transformers --upgrade
    !pip install datasets accelerate
    !pip install evaluate
    !pip install -U PyEvALL

    !pip install torch
    !pip install numpy
    !pip install pandas
    !pip install scikit-learn
    !pip install optuna

In [None]:
from pathlib import Path

if COLAB is True:
  from google.colab import drive
  drive.mount('/content/drive',force_remount=True)
  base_path = "/content/drive/MyDrive/EXISTS2025_TweetBusters"
  library_path = base_path + "/Functions"
else:
  base_path = Path.cwd().parent
  library_path = base_path / "Functions"



sys.path.insert(0, str(library_path))
from readerEXIST2025_2 import EXISTReader

In [None]:
import os
import importlib.util
import sys
import inspect

functions_path = library_path

for filename in os.listdir(functions_path):
    if filename.endswith(".py") and not filename.startswith("__"):
        module_name = filename[:-3]
        file_path = os.path.join(functions_path, filename)

        # Cargar el módulo
        spec = importlib.util.spec_from_file_location(module_name, file_path)
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)

        # Extraer todas las funciones del módulo y cargarlas al espacio global
        for name, func in inspect.getmembers(module, inspect.isfunction):
            globals()[name] = func  # o locals()[name] si estás dentro de una función

In [None]:
# path to the dataset, adapt this path wherever you have the dataset
dataset_path = os.path.join(base_path, "Dataset/EXIST_2025_Dataset_V0.3/")

file_train = os.path.join(dataset_path, "EXIST2025_training.json")
file_dev = os.path.join(dataset_path, "EXIST2025_dev.json")
file_test = os.path.join(dataset_path, "EXIST2025_test_clean.json")


reader_train = EXISTReader(file_train)
reader_dev = EXISTReader(file_dev)
reader_test = EXISTReader(file_test)


EnTrainTask1, EnDevTask1, EnTestTask1 = reader_train.get(lang="EN", subtask="1"), reader_dev.get(lang="EN", subtask="1"), reader_test.get(lang="EN", subtask="1")
EnTrainTask2, EnDevTask2, EnTestTask2 = reader_train.get(lang="EN", subtask="2"), reader_dev.get(lang="EN", subtask="2"), reader_test.get(lang="EN", subtask="2")
EnTrainTask3, EnDevTask3, EnTestTask3 = reader_train.get(lang="EN", subtask="3"), reader_dev.get(lang="EN", subtask="3"), reader_test.get(lang="EN", subtask="3")


SpTrainTask1, SpDevTask1, SpTestTask1  = reader_train.get(lang="ES", subtask="1"), reader_dev.get(lang="ES", subtask="1"), reader_test.get(lang="ES", subtask="1")
SpTrainTask2, SpDevTask2, SpTestTask2  = reader_train.get(lang="ES", subtask="2"), reader_dev.get(lang="ES", subtask="2"), reader_test.get(lang="ES", subtask="2")
SpTrainTask3, SpDevTask3, SpTestTask3  = reader_train.get(lang="ES", subtask="3"), reader_dev.get(lang="ES", subtask="3"), reader_test.get(lang="ES", subtask="3")


In [None]:
import os
import sys
import tempfile
import ast

import numpy as np
import pandas as pd

import torch
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import f1_score

from transformers import AutoTokenizer, AutoModelForCausalLM

from pyevall.evaluation import PyEvALLEvaluation
from pyevall.metrics.metricfactory import MetricFactory
from pyevall.reports.reports import PyEvALLReport
from pyevall.utils.utils import PyEvALLUtils


In [None]:
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from Tasks_LoRA_Pipelines import sexism_classification_pipeline_task1_LoRA  # para Task 1
from Tasks_LoRA_Pipelines import sexism_classification_pipeline_task2_LoRA  # para Task 2
from readerEXIST2025 import EXISTReader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def get_probs(model_ckpt_path, texts, max_length=128, batch_size=32):
    """Carga un modelo guardado y devuelve un array (N, C) de probabilidades."""
    tokenizer = AutoTokenizer.from_pretrained(model_ckpt_path)
    model = AutoModelForSequenceClassification.from_pretrained(model_ckpt_path).to(device)
    all_probs = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        enc = tokenizer(batch, padding=True, truncation=True,
                        max_length=max_length, return_tensors='pt').to(device)
        with torch.no_grad():
            logits = model(**enc).logits
        probs = torch.softmax(logits, dim=-1).cpu().numpy()
        all_probs.append(probs)
    return np.vstack(all_probs)

# 1) Carga datos de validación
reader = EXISTReader("EXIST2025_training.json")
_, _, train_texts, train_labels = reader.get(lang="EN", subtask="1")
_, _, dev_texts, dev_labels = reader.get(lang="EN", subtask="1")

# 2) Nombres de los checkpoints base que quieres ensayar
base_models = ["roberta-base", "xlm-roberta-base", "microsoft/deberta-v3-base"]

# 3) Entrena cada LoRA y captura probabilidades en validación
dev_features = []
for m in base_models:
    # Fine-tune LoRA y fusiona pesos
    mix_model, _ = sexism_classification_pipeline_task1_LoRA(
        (None, train_texts, train_labels),
        (None, dev_texts, dev_labels),
        None,
        model_name=m,
        nlabels=2, ptype="single_label_classification",
        output_dir=f"./lora_{m}"
    )  # :contentReference[oaicite:0]{index=0}

    # mix_model es el modelo ya fusionado en memoria; guárdalo para reload si quieres:
    mix_model.save_pretrained(f"./stacks/{m}")

    # Extrae probabilidades de validación
    probs = get_probs(f"./stacks/{m}", dev_texts)  # shape=(N_dev,2)
    # Tomamos solo la probabilidad de la clase “YES” (índice 1)
    dev_features.append(probs[:,1])

# 4) Construye matriz de características y “meta-learner”
X_dev = np.stack(dev_features, axis=1)  # (N_dev, K)
y_dev = np.array([1 if lab=="YES" else 0 for lab in dev_labels])

meta = LogisticRegression(max_iter=200)
meta.fit(X_dev, y_dev)

# 5) Validación final
y_pred = meta.predict(X_dev)
print("F1 stacking (Task1):", f1_score(y_dev, y_pred))


In [None]:
import numpy as np
from sklearn.metrics import f1_score
from Tasks_LoRA_Pipelines import sexism_classification_pipeline_task3_LoRA
from readerEXIST2025 import EXISTReader

# 1) Carga datos Task 3
reader = EXISTReader("EXIST2025_training.json", task=3)
_, _, train_texts3, train_labels3 = reader.get(lang="EN", subtask="3")
_, _, dev_texts3, dev_labels3 = reader.get(lang="EN", subtask="3")

# 2) Entrena un único LoRA y saca probabilidades (sigmoid)
mix3, _ = sexism_classification_pipeline_task3_LoRA(
    (None, train_texts3, train_labels3),
    (None, dev_texts3, dev_labels3),
    None,
    model_name='roberta-base',
    nlabels=6,
    ptype="multi_label_classification",
    output_dir="./lora_task3"
)  # :contentReference[oaicite:2]{index=2}

# recarga y predice sobre validación:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

tokenizer3 = AutoTokenizer.from_pretrained('roberta-base')
model3 = AutoModelForSequenceClassification.from_pretrained("./lora_task3").to(device)

# obtener logits y pasar por sigmoide
def get_multilabel_probs(model, tokenizer, texts, batch_size=32):
    all_probs=[]
    for i in range(0, len(texts), batch_size):
        batch = tokenizer(texts[i:i+batch_size], padding=True,
                          truncation=True, return_tensors='pt').to(device)
        with torch.no_grad():
            logits = model(**batch).logits
        all_probs.append(torch.sigmoid(logits).cpu().numpy())
    return np.vstack(all_probs)

probs_dev3 = get_multilabel_probs(model3, tokenizer3, dev_texts3)  # (N_dev, 6)
y_true3   = np.array([ [1 if lab_i in lbls else 0
                         for lab_i in sorted({0,1,2,3,4,5})]
                       for lbls in dev_labels3 ])

# 3) Búsqueda de umbral por etiqueta
def tune_thresholds(y_true, y_probs, n_steps=101):
    thresholds = np.linspace(0, 1, n_steps)
    best_ts = []
    for j in range(y_true.shape[1]):
        best_f1, best_t = -1, 0.5
        for t in thresholds:
            preds_j = (y_probs[:,j] >= t).astype(int)
            f1_j = f1_score(y_true[:,j], preds_j)
            if f1_j > best_f1:
                best_f1, best_t = f1_j, t
        best_ts.append(best_t)
    return best_ts

best_thresholds = tune_thresholds(y_true3, probs_dev3)
print("Umbrales óptimos por etiqueta:", best_thresholds)

# 4) Aplicar en test
_, _, test_texts3 = reader.get(lang="EN", subtask="3")[:3]
probs_test3 = get_multilabel_probs(model3, tokenizer3, test_texts3)
preds_test3 = (probs_test3 >= best_thresholds).astype(int)
